Commit 197e67c5 authored by Linus Torvalds's avatar Linus Torvalds

Import 2.3.12pre7

parent 72be09b0
......@@ -55,7 +55,6 @@
unsigned long init_user_stack[1024] = { STACK_MAGIC, };
static struct vm_area_struct init_mmap = INIT_MMAP;
static struct fs_struct init_fs = INIT_FS;
static struct file * init_fd_array[NR_OPEN] = { NULL, };
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
struct mm_struct init_mm = INIT_MM(init_mm);
......
......@@ -6,7 +6,6 @@
static struct vm_area_struct init_mmap = INIT_MMAP;
static struct fs_struct init_fs = INIT_FS;
static struct file * init_fd_array[NR_OPEN] = { NULL, };
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
struct mm_struct init_mm = INIT_MM(init_mm);
......
......@@ -8,7 +8,6 @@
static struct vm_area_struct init_mmap = INIT_MMAP;
static struct fs_struct init_fs = INIT_FS;
static struct file * init_fd_array[NR_OPEN] = { NULL, };
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
struct mm_struct init_mm = INIT_MM(init_mm);
......
......@@ -40,7 +40,6 @@
*/
static struct vm_area_struct init_mmap = INIT_MMAP;
static struct fs_struct init_fs = INIT_FS;
static struct file * init_fd_array[NR_OPEN] = { NULL, };
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
struct mm_struct init_mm = INIT_MM(init_mm);
......
......@@ -6,7 +6,6 @@
static struct vm_area_struct init_mmap = INIT_MMAP;
static struct fs_struct init_fs = INIT_FS;
static struct files * init_fd_array[NR_OPEN] = { NULL, };
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
struct mm_struct init_mm = INIT_MM(init_mm);
......
......@@ -47,7 +47,6 @@ extern unsigned long _get_SP(void);
struct task_struct *last_task_used_math = NULL;
static struct vm_area_struct init_mmap = INIT_MMAP;
static struct fs_struct init_fs = INIT_FS;
static struct file * init_fd_array[NR_OPEN] = { NULL, };
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
struct mm_struct init_mm = INIT_MM(init_mm);
......
......@@ -6,7 +6,6 @@
static struct vm_area_struct init_mmap = INIT_MMAP;
static struct fs_struct init_fs = INIT_FS;
static struct file * init_fd_array[NR_OPEN] = { NULL, };
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
struct mm_struct init_mm = INIT_MM(init_mm);
......
......@@ -6,7 +6,6 @@
static struct vm_area_struct init_mmap = INIT_MMAP;
static struct fs_struct init_fs = INIT_FS;
static struct file * init_fd_array[NR_OPEN] = { NULL, };
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
struct mm_struct init_mm = INIT_MM(init_mm);
......
......@@ -866,7 +866,7 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)
SOLD("entry");
lock_kernel();
if(fd >= NR_OPEN) goto out;
if(fd >= current->files->max_fds) goto out;
filp = current->files->fd[fd];
if(!filp) goto out;
......@@ -933,7 +933,7 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)
SOLD("entry");
lock_kernel();
if(fd >= NR_OPEN) goto out;
if(fd >= current->files->max_fds) goto out;
filp = current->files->fd[fd];
if(!filp) goto out;
......
......@@ -428,6 +428,9 @@ void make_request(int major,int rw, struct buffer_head * bh)
kstat.pgpgin++;
max_req = NR_REQUEST; /* reads take precedence */
break;
case WRITERAW:
rw = WRITE;
goto do_write; /* Skip the buffer refile */
case WRITEA:
rw_ahead = 1;
rw = WRITE; /* drop into WRITE */
......@@ -435,6 +438,7 @@ void make_request(int major,int rw, struct buffer_head * bh)
if (!test_and_clear_bit(BH_Dirty, &bh->b_state))
goto end_io; /* Hmmph! Nothing to write */
refile_buffer(bh);
do_write:
/*
* We don't allow the write-requests to fill up the
* queue completely: we want some room for reads,
......@@ -641,7 +645,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bh[])
#endif
}
if ((rw == WRITE || rw == WRITEA) && is_read_only(bh[0]->b_dev)) {
if ((rw & WRITE) && is_read_only(bh[0]->b_dev)) {
printk(KERN_NOTICE "Can't write to read-only device %s\n",
kdevname(bh[0]->b_dev));
goto sorry;
......
......@@ -20,7 +20,7 @@ FONTMAPFILE = cp437.uni
L_TARGET := char.a
M_OBJS :=
L_OBJS := tty_io.o n_tty.o tty_ioctl.o mem.o random.o
L_OBJS := tty_io.o n_tty.o tty_ioctl.o mem.o random.o raw.o
LX_OBJS := pty.o misc.o
ifdef CONFIG_VT
......
......@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/joystick.h>
#include <linux/i2c.h>
#include <linux/raw.h>
#include <asm/uaccess.h>
#include <asm/io.h>
......@@ -602,6 +603,7 @@ __initfunc(int chr_dev_init(void))
if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
printk("unable to get major %d for memory devs\n", MEM_MAJOR);
rand_initialize();
raw_init();
#ifdef CONFIG_USB
usb_init();
#endif
......
/*
* linux/drivers/char/raw.c
*
* Front-end raw character devices. These can be bound to any block
* devices to provide genuine Unix raw character device semantics.
*
* We reserve minor number 0 for a control interface. ioctl()s on this
* device are used to bind the other minor numbers to block devices.
*/
#include <linux/fs.h>
#include <linux/iobuf.h>
#include <linux/major.h>
#include <linux/blkdev.h>
#include <linux/raw.h>
#include <asm/uaccess.h>
#define dprintk(x...)
static kdev_t raw_device_bindings[256] = {};
static int raw_device_inuse[256] = {};
static int raw_device_sector_size[256] = {};
static int raw_device_sector_bits[256] = {};
extern struct file_operations * get_blkfops(unsigned int major);
static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *);
ssize_t raw_read(struct file *, char *, size_t, loff_t *);
ssize_t raw_write(struct file *, const char *, size_t, loff_t *);
int raw_open(struct inode *, struct file *);
int raw_release(struct inode *, struct file *);
int raw_ctl_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
static struct file_operations raw_fops = {
NULL, /* llseek */
raw_read, /* read */
raw_write, /* write */
NULL, /* readdir */
NULL, /* poll */
NULL, /* ioctl */
NULL, /* mmap */
raw_open, /* open */
NULL, /* flush */
raw_release, /* release */
NULL /* fsync */
};
static struct file_operations raw_ctl_fops = {
NULL, /* llseek */
NULL, /* read */
NULL, /* write */
NULL, /* readdir */
NULL, /* poll */
raw_ctl_ioctl, /* ioctl */
NULL, /* mmap */
raw_open, /* open */
NULL, /* flush */
NULL, /* no special release code */
NULL /* fsync */
};
void __init raw_init(void)
{
register_chrdev(RAW_MAJOR, "raw", &raw_fops);
}
/*
* The raw IO open and release code needs to fake appropriate
* open/release calls to the underlying block devices.
*/
static int bdev_open(kdev_t dev, int mode)
{
int err = 0;
struct file dummy_file = {};
struct dentry dummy_dentry = {};
struct inode * inode = get_empty_inode();
if (!inode)
return -ENOMEM;
dummy_file.f_op = get_blkfops(MAJOR(dev));
if (!dummy_file.f_op) {
err = -ENODEV;
goto done;
}
if (dummy_file.f_op->open) {
inode->i_rdev = dev;
dummy_dentry.d_inode = inode;
dummy_file.f_dentry = &dummy_dentry;
dummy_file.f_mode = mode;
err = dummy_file.f_op->open(inode, &dummy_file);
}
done:
iput(inode);
return err;
}
static int bdev_close(kdev_t dev)
{
int err;
struct inode * inode = get_empty_inode();
if (!inode)
return -ENOMEM;
inode->i_rdev = dev;
err = blkdev_release(inode);
iput(inode);
return err;
}
/*
* Open/close code for raw IO.
*/
int raw_open(struct inode *inode, struct file *filp)
{
int minor;
kdev_t bdev;
int err;
int sector_size;
int sector_bits;
minor = MINOR(inode->i_rdev);
/*
* Is it the control device?
*/
if (minor == 0) {
filp->f_op = &raw_ctl_fops;
return 0;
}
/*
* No, it is a normal raw device. All we need to do on open is
* to check that the device is bound, and force the underlying
* block device to a sector-size blocksize.
*/
bdev = raw_device_bindings[minor];
if (bdev == NODEV)
return -ENODEV;
err = bdev_open(bdev, filp->f_mode);
if (err)
return err;
/*
* Don't change the blocksize if we already have users using
* this device
*/
if (raw_device_inuse[minor]++)
return 0;
/*
* Don't interfere with mounted devices: we cannot safely set
* the blocksize on a device which is already mounted.
*/
sector_size = 512;
if (lookup_vfsmnt(bdev) != NULL) {
if (blksize_size[MAJOR(bdev)])
sector_size = blksize_size[MAJOR(bdev)][MINOR(bdev)];
} else {
if (hardsect_size[MAJOR(bdev)])
sector_size = hardsect_size[MAJOR(bdev)][MINOR(bdev)];
}
set_blocksize(bdev, sector_size);
raw_device_sector_size[minor] = sector_size;
for (sector_bits = 0; !(sector_size & 1); )
sector_size>>=1, sector_bits++;
raw_device_sector_bits[minor] = sector_bits;
return 0;
}
int raw_release(struct inode *inode, struct file *filp)
{
int minor;
kdev_t bdev;
minor = MINOR(inode->i_rdev);
bdev = raw_device_bindings[minor];
bdev_close(bdev);
raw_device_inuse[minor]--;
return 0;
}
/*
* Deal with ioctls against the raw-device control interface, to bind
* and unbind other raw devices.
*/
int raw_ctl_ioctl(struct inode *inode,
struct file *flip,
unsigned int command,
unsigned long arg)
{
struct raw_config_request rq;
int err = 0;
int minor;
switch (command) {
case RAW_SETBIND:
case RAW_GETBIND:
/* First, find out which raw minor we want */
err = copy_from_user(&rq, (void *) arg, sizeof(rq));
if (err)
break;
minor = rq.raw_minor;
if (minor == 0 || minor > MINORMASK) {
err = -EINVAL;
break;
}
if (command == RAW_SETBIND) {
/*
* For now, we don't need to check that the underlying
* block device is present or not: we can do that when
* the raw device is opened. Just check that the
* major/minor numbers make sense.
*/
if (rq.block_major == NODEV ||
rq.block_major > MAX_BLKDEV ||
rq.block_minor > MINORMASK) {
err = -EINVAL;
break;
}
if (raw_device_inuse[minor]) {
err = -EBUSY;
break;
}
raw_device_bindings[minor] =
MKDEV(rq.block_major, rq.block_minor);
} else {
rq.block_major = MAJOR(raw_device_bindings[minor]);
rq.block_minor = MINOR(raw_device_bindings[minor]);
err = copy_to_user((void *) arg, &rq, sizeof(rq));
}
break;
default:
err = -EINVAL;
}
return err;
}
ssize_t raw_read(struct file *filp, char * buf,
size_t size, loff_t *offp)
{
return rw_raw_dev(READ, filp, buf, size, offp);
}
ssize_t raw_write(struct file *filp, const char *buf,
size_t size, loff_t *offp)
{
return rw_raw_dev(WRITE, filp, (char *) buf, size, offp);
}
#define SECTOR_BITS 9
#define SECTOR_SIZE (1U << SECTOR_BITS)
#define SECTOR_MASK (SECTOR_SIZE - 1)
ssize_t rw_raw_dev(int rw, struct file *filp, char *buf,
size_t size, loff_t *offp)
{
struct kiobuf * iobuf;
int err;
unsigned long blocknr, blocks;
unsigned long b[KIO_MAX_SECTORS];
size_t transferred;
int iosize;
int i;
int minor;
kdev_t dev;
unsigned long limit;
int sector_size, sector_bits, sector_mask;
int max_sectors;
/*
* First, a few checks on device size limits
*/
minor = MINOR(filp->f_dentry->d_inode->i_rdev);
dev = raw_device_bindings[minor];
sector_size = raw_device_sector_size[minor];
sector_bits = raw_device_sector_bits[minor];
sector_mask = sector_size- 1;
max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
if (blk_size[MAJOR(dev)])
limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits;
else
limit = INT_MAX;
dprintk ("rw_raw_dev: dev %d:%d (+%d)\n",
MAJOR(dev), MINOR(dev), limit);
if ((*offp & sector_mask) || (size & sector_mask))
return -EINVAL;
if ((*offp >> sector_bits) > limit)
return 0;
/*
* We'll just use one kiobuf
*/
err = alloc_kiovec(1, &iobuf);
if (err)
return err;
/*
* Split the IO into KIO_MAX_SECTORS chunks, mapping and
* unmapping the single kiobuf as we go to perform each chunk of
* IO.
*/
transferred = 0;
blocknr = *offp >> sector_bits;
while (size > 0) {
blocks = size >> sector_bits;
if (blocks > max_sectors)
blocks = max_sectors;
if (blocks > limit - blocknr)
blocks = limit - blocknr;
if (!blocks)
break;
iosize = blocks << sector_bits;
err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
if (err)
break;
for (i=0; i < blocks; i++)
b[i] = blocknr++;
err = brw_kiovec(rw, 1, &iobuf, dev, b, sector_size, 0);
if (err >= 0) {
transferred += err;
size -= err;
buf += err;
}
unmap_kiobuf(iobuf);
if (err != iosize)
break;
}
free_kiovec(1, &iobuf);
if (transferred) {
*offp += transferred;
return transferred;
}
return err;
}
......@@ -432,38 +432,14 @@ int usb_hub_init(void)
void usb_hub_cleanup(void)
{
struct list_head *next, *tmp, *head = &all_hubs_list;
struct usb_hub *hub;
unsigned long flags, flags2;
/* Free the resources allocated by each hub */
spin_lock_irqsave(&hub_list_lock, flags);
spin_lock_irqsave(&hub_event_lock, flags2);
tmp = head->next;
while (tmp != head) {
hub = list_entry(tmp, struct usb_hub, hub_list);
next = tmp->next;
list_del(&hub->event_list);
INIT_LIST_HEAD(&hub->event_list);
list_del(tmp); /* &hub->hub_list */
INIT_LIST_HEAD(tmp); /* &hub->hub_list */
/* XXX we should disconnect each connected port here */
usb_release_irq(hub->dev, hub->irq_handle);
hub->irq_handle = NULL;
kfree(hub);
tmp = next;
}
/*
* Hub resources are freed for us by usb_deregister. It
* usb_driver_purge on every device which in turn calls that
* devices disconnect function if it is using this driver.
* The hub_disconnect function takes care of releasing the
* individual hub resources. -greg
*/
usb_deregister(&hub_driver);
spin_unlock_irqrestore(&hub_event_lock, flags2);
spin_unlock_irqrestore(&hub_list_lock, flags);
} /* usb_hub_cleanup() */
#ifdef MODULE
......
......@@ -13,7 +13,7 @@ O_TARGET := fs.o
O_OBJS = open.o read_write.o devices.o file_table.o buffer.o \
super.o block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \
dcache.o inode.o attr.o bad_inode.o $(BINFMTS)
dcache.o inode.o attr.o bad_inode.o file.o iobuf.o $(BINFMTS)
MOD_LIST_NAME := FS_MODULES
ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \
......
......@@ -40,6 +40,7 @@
#include <linux/file.h>
#include <linux/init.h>
#include <linux/quotaops.h>
#include <linux/iobuf.h>
#include <asm/uaccess.h>
#include <asm/io.h>
......@@ -1527,6 +1528,221 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long
return err;
}
/*
* IO completion routine for a buffer_head being used for kiobuf IO: we
* can't dispatch the kiobuf callback until io_count reaches 0.
*/
static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate)
{
struct kiobuf *kiobuf;
mark_buffer_uptodate(bh, uptodate);
kiobuf = bh->b_kiobuf;
if (atomic_dec_and_test(&kiobuf->io_count))
kiobuf->end_io(kiobuf);
if (!uptodate)
kiobuf->errno = -EIO;
}
/*
* For brw_kiovec: submit a set of buffer_head temporary IOs and wait
* for them to complete. Clean up the buffer_heads afterwards.
*/
#define dprintk(x...)
static int do_kio(struct kiobuf *kiobuf,
int rw, int nr, struct buffer_head *bh[], int size)
{
int iosize;
int i;
struct buffer_head *tmp;
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
dprintk ("do_kio start %d\n", rw);
if (rw == WRITE)
rw = WRITERAW;
atomic_add(nr, &kiobuf->io_count);
kiobuf->errno = 0;
ll_rw_block(rw, nr, bh);
kiobuf_wait_for_io(kiobuf);
spin_lock(&unused_list_lock);
iosize = 0;
for (i = nr; --i >= 0; ) {
iosize += size;
tmp = bh[i];
if (!buffer_uptodate(tmp)) {
/* We are traversing bh'es in reverse order so
clearing iosize on error calculates the
amount of IO before the first error. */
iosize = 0;
}
__put_unused_buffer_head(tmp);
}
spin_unlock(&unused_list_lock);
dprintk ("do_kio end %d %d\n", iosize, err);
if (iosize)
return iosize;
if (kiobuf->errno)
return kiobuf->errno;
return -EIO;
}
/*
* Start I/O on a physical range of kernel memory, defined by a vector
* of kiobuf structs (much like a user-space iovec list).
*
* The kiobuf must already be locked for IO. IO is submitted
* asynchronously: you need to check page->locked, page->uptodate, and
* maybe wait on page->wait.
*
* It is up to the caller to make sure that there are enough blocks
* passed in to completely map the iobufs to disk.
*/
int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
kdev_t dev, unsigned long b[], int size, int bmap)
{
int err;
int length;
int transferred;
int i;
int bufind;
int pageind;
int bhind;
int offset;
unsigned long blocknr;
struct kiobuf * iobuf = NULL;
unsigned long page;
struct page * map;
struct buffer_head *tmp, *bh[KIO_MAX_SECTORS];
if (!nr)
return 0;
/*
* First, do some alignment and validity checks
*/
for (i = 0; i < nr; i++) {
iobuf = iovec[i];
if ((iobuf->offset & (size-1)) ||
(iobuf->length & (size-1)))
return -EINVAL;
if (!iobuf->locked)
panic("brw_kiovec: iobuf not locked for I/O");
if (!iobuf->nr_pages)
panic("brw_kiovec: iobuf not initialised");
}
/* DEBUG */
#if 0
return iobuf->length;
#endif
dprintk ("brw_kiovec: start\n");
/*
* OK to walk down the iovec doing page IO on each page we find.
*/
bufind = bhind = transferred = err = 0;
for (i = 0; i < nr; i++) {
iobuf = iovec[i];
offset = iobuf->offset;
length = iobuf->length;
dprintk ("iobuf %d %d %d\n", offset, length, size);
for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
page = iobuf->pagelist[pageind];
map = iobuf->maplist[pageind];
while (length > 0) {
blocknr = b[bufind++];
tmp = get_unused_buffer_head(0);
if (!tmp) {
err = -ENOMEM;
goto error;
}
tmp->b_dev = B_FREE;
tmp->b_size = size;
tmp->b_data = (char *) (page + offset);
tmp->b_this_page = tmp;
init_buffer(tmp, end_buffer_io_kiobuf, NULL);
tmp->b_dev = dev;
tmp->b_blocknr = blocknr;
tmp->b_state = 1 << BH_Mapped;
tmp->b_kiobuf = iobuf;
if (rw == WRITE) {
set_bit(BH_Uptodate, &tmp->b_state);
set_bit(BH_Dirty, &tmp->b_state);
}
dprintk ("buffer %d (%d) at %p\n",
bhind, tmp->b_blocknr, tmp->b_data);
bh[bhind++] = tmp;
length -= size;
offset += size;
/*
* Start the IO if we have got too much
*/
if (bhind >= KIO_MAX_SECTORS) {
err = do_kio(iobuf, rw, bhind, bh, size);
if (err >= 0)
transferred += err;
else
goto finished;
bhind = 0;
}
if (offset >= PAGE_SIZE) {
offset = 0;
break;
}
} /* End of block loop */
} /* End of page loop */
} /* End of iovec loop */
/* Is there any IO still left to submit? */
if (bhind) {
err = do_kio(iobuf, rw, bhind, bh, size);
if (err >= 0)
transferred += err;
else
goto finished;
}
finished:
dprintk ("brw_kiovec: end (%d, %d)\n", transferred, err);
if (transferred)
return transferred;
return err;
error:
/* We got an error allocation the bh'es. Just free the current
buffer_heads and exit. */
spin_lock(&unused_list_lock);
for (i = bhind; --i >= 0; ) {
__put_unused_buffer_head(bh[bhind]);
}
spin_unlock(&unused_list_lock);
goto finished;
}
/*
* Start I/O on a page.
* This function expects the page to be locked and may return
......
......@@ -449,9 +449,9 @@ static inline void flush_old_files(struct files_struct * files)
unsigned long set, i;
i = j * __NFDBITS;
if (i >= files->max_fds)
if (i >= files->max_fds || i >= files->max_fdset)
break;
set = xchg(&files->close_on_exec.fds_bits[j], 0);
set = xchg(&files->close_on_exec->fds_bits[j], 0);
j++;
for ( ; set ; i++,set >>= 1) {
if (set & 1)
......
......@@ -12,36 +12,89 @@
extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);
static inline int dupfd(struct file *file, unsigned int arg)
/*
* locate_fd finds a free file descriptor in the open_fds fdset,
* expanding the fd arrays if necessary. The files write lock will be
* held on exit to ensure that the fd can be entered atomically.
*/
static inline int locate_fd(struct files_struct *files,
struct file *file, int start)
{
struct files_struct * files = current->files;
unsigned int newfd;
int error;
error = -EMFILE;
write_lock(&files->file_lock);
arg = find_next_zero_bit(&files->open_fds, NR_OPEN, arg);
if (arg >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out_putf;
FD_SET(arg, &files->open_fds);
FD_CLR(arg, &files->close_on_exec);
write_unlock(&files->file_lock);
fd_install(arg, file);
error = arg;
repeat:
error = -EMFILE;
if (start < files->next_fd)
start = files->next_fd;
if (start >= files->max_fdset) {
expand:
error = expand_files(files, start);
if (error < 0)
goto out;
goto repeat;
}
newfd = find_next_zero_bit(files->open_fds->fds_bits,
files->max_fdset, start);
error = -EMFILE;
if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
if (newfd >= files->max_fdset)
goto expand;
error = expand_files(files, newfd);
if (error < 0)
goto out;
if (error) /* If we might have blocked, try again. */
goto repeat;
if (start <= files->next_fd)
files->next_fd = newfd + 1;
error = newfd;
out:
return error;
}
static inline void allocate_fd(struct files_struct *files,
struct file *file, int fd)
{
FD_SET(fd, files->open_fds);
FD_CLR(fd, files->close_on_exec);
write_unlock(&files->file_lock);
fd_install(fd, file);
}
static int dupfd(struct file *file, int start)
{
struct files_struct * files = current->files;
int ret;
ret = locate_fd(files, file, start);
if (ret < 0)
goto out_putf;
allocate_fd(files, file, ret);
return ret;
out_putf:
write_unlock(&files->file_lock);
fput(file);
goto out;
return ret;
}
asmlinkage int sys_dup2(unsigned int oldfd, unsigned int newfd)
{
int err = -EBADF;
struct file * file;
struct files_struct * files = current->files;
read_lock(&current->files->file_lock);
write_lock(&current->files->file_lock);
if (!(file = fcheck(oldfd)))
goto out_unlock;
err = newfd;
......@@ -50,15 +103,33 @@ asmlinkage int sys_dup2(unsigned int oldfd, unsigned int newfd)
err = -EBADF;
if (newfd >= NR_OPEN)
goto out_unlock; /* following POSIX.1 6.2.1 */
get_file(file);
read_unlock(&current->files->file_lock);
get_file(file); /* We are now finished with oldfd */
err = expand_files(files, newfd);
if (err < 0) {
write_unlock(&files->file_lock);
fput(file);
goto out;
}
/* To avoid races with open() and dup(), we will mark the fd as
* in-use in the open-file bitmap throughout the entire dup2()
* process. This is quite safe: do_close() uses the fd array
* entry, not the bitmap, to decide what work needs to be
* done. --sct */
FD_SET(newfd, files->open_fds);
write_unlock(&files->file_lock);
do_close(newfd, 0);
write_lock(&files->file_lock);
allocate_fd(files, file, newfd);
err = newfd;
sys_close(newfd);
err = dupfd(file, newfd);
out:
return err;
out_unlock:
read_unlock(&current->files->file_lock);
write_unlock(&current->files->file_lock);
goto out;
}
......@@ -66,6 +137,7 @@ asmlinkage int sys_dup(unsigned int fildes)
{
int ret = -EBADF;
struct file * file = fget(fildes);
if (file)
ret = dupfd(file, 0);
return ret;
......@@ -118,13 +190,13 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
}
break;
case F_GETFD:
err = FD_ISSET(fd, &current->files->close_on_exec);
err = FD_ISSET(fd, current->files->close_on_exec);
break;
case F_SETFD:
if (arg&1)
FD_SET(fd, &current->files->close_on_exec);
FD_SET(fd, current->files->close_on_exec);
else
FD_CLR(fd, &current->files->close_on_exec);
FD_CLR(fd, current->files->close_on_exec);
break;
case F_GETFL:
err = filp->f_flags;
......@@ -152,7 +224,6 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
err = filp->f_owner.pid;
break;
case F_SETOWN:
err = 0;
filp->f_owner.pid = arg;
filp->f_owner.uid = current->uid;
filp->f_owner.euid = current->euid;
......@@ -172,10 +243,9 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
break;
default:
/* sockets need a few special fcntls. */
err = -EINVAL;
if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
err = sock_fcntl (filp, cmd, arg);
else
err = -EINVAL;
break;
}
fput(filp);
......
/*
* linux/fs/open.c
*
* Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
*
* Manage the dynamic fd arrays in the process files_struct.
*/
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/malloc.h>
#include <linux/vmalloc.h>
#include <asm/bitops.h>
/*
* Allocate an fd array, using get_free_page() if possible.
* Note: the array isn't cleared at allocation time.
*/
struct file ** alloc_fd_array(int num)
{
struct file **new_fds;
int size = num * sizeof(struct file *);
if (size < PAGE_SIZE)
new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
else if (size == PAGE_SIZE)
new_fds = (struct file **) __get_free_page(GFP_KERNEL);
else
new_fds = (struct file **) vmalloc(size);
return new_fds;
}
void free_fd_array(struct file **array, int num)
{
int size = num * sizeof(struct file *);
if (!array) {
printk (KERN_ERR __FUNCTION__ "array = 0 (num = %d)\n", num);
return;
}
if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */
return;
else if (size < PAGE_SIZE)
kfree(array);
else if (size == PAGE_SIZE)
free_page((unsigned long) array);
else
vfree(array);
}
/*
* Expand the fd array in the files_struct. Called with the files
* spinlock held for write.
*/
int expand_fd_array(struct files_struct *files, int nr)
{
struct file **new_fds;
int error, nfds;
error = -EMFILE;
if (files->max_fds >= NR_OPEN || nr > NR_OPEN)
goto out;
nfds = files->max_fds;
write_unlock(&files->file_lock);
/*
* Expand to the max in easy steps, and keep expanding it until
* we have enough for the requested fd array size.
*/
do {
#if NR_OPEN_DEFAULT < 256
if (nfds < 256)
nfds = 256;
else
#endif
if (nfds < (PAGE_SIZE / sizeof(struct file *)))
nfds = PAGE_SIZE / sizeof(struct file *);
else {
nfds = nfds * 2;
if (nfds > NR_OPEN)
nfds = NR_OPEN;
}
} while (nfds < nr);
error = -ENOMEM;
new_fds = alloc_fd_array(nfds);
write_lock(&files->file_lock);
if (!new_fds)
goto out;
/* Copy the existing array and install the new pointer */
if (nfds > files->max_fds) {
struct file **old_fds;
int i;
old_fds = xchg(&files->fd, new_fds);
i = xchg(&files->max_fds, nfds);
/* Don't copy/clear the array if we are creating a new
fd array for fork() */
if (i) {
memcpy(new_fds, old_fds, i * sizeof(struct file *));
/* clear the remainder of the array */
memset(&new_fds[i], 0,
(nfds-i) * sizeof(struct file *));
write_unlock(&files->file_lock);
free_fd_array(old_fds, i);
write_lock(&files->file_lock);
}
} else {
/* Somebody expanded the array while we slept ... */
write_unlock(&files->file_lock);
free_fd_array(new_fds, nfds);
write_lock(&files->file_lock);
}
error = 0;
out:
return error;
}
/*
* Allocate an fdset array, using get_free_page() if possible.
* Note: the array isn't cleared at allocation time.
*/
fd_set * alloc_fdset(int num)
{
fd_set *new_fdset;
int size = num / 8;
if (size < PAGE_SIZE)
new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
else if (size == PAGE_SIZE)
new_fdset = (fd_set *) __get_free_page(GFP_KERNEL);
else
new_fdset = (fd_set *) vmalloc(size);
return new_fdset;
}
void free_fdset(fd_set *array, int num)
{
int size = num / 8;
if (!array) {
printk (KERN_ERR __FUNCTION__ "array = 0 (num = %d)\n", num);
return;
}
if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */
return;
else if (size < PAGE_SIZE)
kfree(array);
else if (size == PAGE_SIZE)
free_page((unsigned long) array);
else
vfree(array);
}
/*
* Expand the fdset in the files_struct. Called with the files spinlock
* held for write.
*/
int expand_fdset(struct files_struct *files, int nr)
{
fd_set *new_openset = 0, *new_execset = 0;
int error, nfds = 0;
error = -EMFILE;
if (files->max_fdset >= NR_OPEN || nr > NR_OPEN)
goto out;
nfds = files->max_fdset;
write_unlock(&files->file_lock);
/* Expand to the max in easy steps */
do {
if (nfds < (PAGE_SIZE * 8))
nfds = PAGE_SIZE * 8;
else {
nfds = nfds * 2;
if (nfds > NR_OPEN)
nfds = NR_OPEN;
}
} while (nfds < nr);
error = -ENOMEM;
new_openset = alloc_fdset(nfds);
new_execset = alloc_fdset(nfds);
write_lock(&files->file_lock);
if (!new_openset || !new_execset)
goto out;
error = 0;
/* Copy the existing tables and install the new pointers */
if (nfds > files->max_fdset) {
int i = files->max_fdset / (sizeof(unsigned long) * 8);
int count = (nfds - files->max_fdset) / 8;
/*
* Don't copy the entire array if the current fdset is
* not yet initialised.
*/
if (i) {
memcpy (new_openset, files->open_fds, files->max_fdset/8);
memcpy (new_execset, files->close_on_exec, files->max_fdset/8);
memset (&new_openset->fds_bits[i], 0, count);
memset (&new_execset->fds_bits[i], 0, count);
}
nfds = xchg(&files->max_fdset, nfds);
new_openset = xchg(&files->open_fds, new_openset);
new_execset = xchg(&files->close_on_exec, new_execset);
write_unlock(&files->file_lock);
free_fdset (new_openset, nfds);
free_fdset (new_execset, nfds);
write_lock(&files->file_lock);
return 0;
}
/* Somebody expanded the array while we slept ... */
out:
write_unlock(&files->file_lock);
if (new_openset)
free_fdset(new_openset, nfds);
if (new_execset)
free_fdset(new_execset, nfds);
write_lock(&files->file_lock);
return error;
}
/*
* iobuf.c
*
* Keep track of the general-purpose IO-buffer structures used to track
* abstract kernel-space io buffers.
*
*/
#include <linux/iobuf.h>
#include <linux/malloc.h>
#include <linux/slab.h>
static kmem_cache_t *kiobuf_cachep;
/*
* The default IO completion routine for kiobufs: just wake up
* the kiobuf, nothing more.
*/
void simple_wakeup_kiobuf(struct kiobuf *kiobuf)
{
wake_up(&kiobuf->wait_queue);
}
void __init kiobuf_init(void)
{
kiobuf_cachep = kmem_cache_create("kiobuf",
sizeof(struct kiobuf),
0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if(!kiobuf_cachep)
panic("Cannot create kernel iobuf cache\n");
}
int alloc_kiovec(int nr, struct kiobuf **bufp)
{
int i;
struct kiobuf *iobuf;
for (i = 0; i < nr; i++) {
iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL);
if (!iobuf) {
free_kiovec(i, bufp);
return -ENOMEM;
}
memset(iobuf, 0, sizeof(*iobuf));
init_waitqueue_head(&iobuf->wait_queue);
iobuf->end_io = simple_wakeup_kiobuf;
iobuf->array_len = KIO_STATIC_PAGES;
iobuf->pagelist = iobuf->page_array;
iobuf->maplist = iobuf->map_array;
*bufp++ = iobuf;
}
return 0;
}
void free_kiovec(int nr, struct kiobuf **bufp)
{
int i;
struct kiobuf *iobuf;
for (i = 0; i < nr; i++) {
iobuf = bufp[i];
if (iobuf->array_len > KIO_STATIC_PAGES) {
kfree (iobuf->pagelist);
kfree (iobuf->maplist);
}
kmem_cache_free(kiobuf_cachep, bufp[i]);
}
}
int expand_kiobuf(struct kiobuf *iobuf, int wanted)
{
unsigned long * pagelist;
struct page ** maplist;
if (iobuf->array_len >= wanted)
return 0;
pagelist = (unsigned long *)
kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL);
if (!pagelist)
return -ENOMEM;
maplist = (struct page **)
kmalloc(wanted * sizeof(struct page **), GFP_KERNEL);
if (!maplist) {
kfree(pagelist);
return -ENOMEM;
}
/* Did it grow while we waited? */
if (iobuf->array_len >= wanted) {
kfree(pagelist);
kfree(maplist);
return 0;
}
memcpy (pagelist, iobuf->pagelist, wanted * sizeof(unsigned long));
memcpy (maplist, iobuf->maplist, wanted * sizeof(struct page **));
if (iobuf->array_len > KIO_STATIC_PAGES) {
kfree (iobuf->pagelist);
kfree (iobuf->maplist);
}
iobuf->pagelist = pagelist;
iobuf->maplist = maplist;
iobuf->array_len = wanted;
return 0;
}
void kiobuf_wait_for_io(struct kiobuf *kiobuf)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
add_wait_queue(&kiobuf->wait_queue, &wait);
repeat:
tsk->state = TASK_UNINTERRUPTIBLE;
run_task_queue(&tq_disk);
if (atomic_read(&kiobuf->io_count) != 0) {
schedule();
goto repeat;
}
tsk->state = TASK_RUNNING;
remove_wait_queue(&kiobuf->wait_queue, &wait);
}
......@@ -61,11 +61,11 @@ asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
lock_kernel();
switch (cmd) {
case FIOCLEX:
FD_SET(fd, &current->files->close_on_exec);
FD_SET(fd, current->files->close_on_exec);
break;
case FIONCLEX:
FD_CLR(fd, &current->files->close_on_exec);
FD_CLR(fd, current->files->close_on_exec);
break;
case FIONBIO:
......
......@@ -685,10 +685,14 @@ int get_unused_fd(void)
struct files_struct * files = current->files;
int fd, error;
error = -EMFILE;
error = -EMFILE;
write_lock(&files->file_lock);
fd = find_first_zero_bit(&files->open_fds, NR_OPEN);
repeat:
fd = find_next_zero_bit(files->open_fds,
current->files->max_fdset,
files->next_fd);
/*
* N.B. For clone tasks sharing a files structure, this test
* will limit the total number of files that can be opened.
......@@ -696,10 +700,31 @@ int get_unused_fd(void)
if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
/* Check here for fd > files->max_fds to do dynamic expansion */
/* Do we need to expand the fdset array? */
if (fd >= current->files->max_fdset) {
error = expand_fdset(files, 0);
if (!error) {
error = -EMFILE;
goto repeat;
}
goto out;
}
/*
* Check whether we need to expand the fd array.
*/
if (fd >= files->max_fds) {
error = expand_fd_array(files, 0);
if (!error) {
error = -EMFILE;
goto repeat;
}
goto out;
}
FD_SET(fd, &files->open_fds);
FD_CLR(fd, &files->close_on_exec);
FD_SET(fd, files->open_fds);
FD_CLR(fd, files->close_on_exec);
files->next_fd = fd + 1;
#if 1
/* Sanity check */
if (files->fd[fd] != NULL) {
......@@ -717,7 +742,9 @@ int get_unused_fd(void)
inline void put_unused_fd(unsigned int fd)
{
write_lock(&current->files->file_lock);
FD_CLR(fd, &current->files->open_fds);
FD_CLR(fd, current->files->open_fds);
if (fd < current->files->next_fd)
current->files->next_fd = fd;
write_unlock(&current->files->file_lock);
}
......@@ -790,8 +817,12 @@ int filp_close(struct file *filp, fl_owner_t id)
* Careful here! We test whether the file pointer is NULL before
* releasing the fd. This ensures that one clone task can't release
* an fd while another clone is opening it.
*
* The "release" argument tells us whether or not to mark the fd as free
* or not in the open-files bitmap. dup2 uses this to retain the fd
* without races.
*/
asmlinkage int sys_close(unsigned int fd)
int do_close(unsigned int fd, int release)
{
int error;
struct file * filp;
......@@ -802,9 +833,10 @@ asmlinkage int sys_close(unsigned int fd)
filp = frip(fd);
if (!filp)
goto out_unlock;
FD_CLR(fd, &files->close_on_exec);
FD_CLR(fd, files->close_on_exec);
write_unlock(&files->file_lock);
put_unused_fd(fd);
if (release)
put_unused_fd(fd);
lock_kernel();
error = filp_close(filp, files);
unlock_kernel();
......@@ -815,6 +847,11 @@ asmlinkage int sys_close(unsigned int fd)
goto out;
}
asmlinkage int sys_close(unsigned int fd)
{
return do_close(fd, 1);
}
/*
* This routine simulates a hangup on the tty, to arrange that users
* are given clean terminals at login time.
......
......@@ -725,11 +725,13 @@ static inline char * task_state(struct task_struct *p, char *buffer)
"PPid:\t%d\n"
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n"
"FDSize:\t%d\n"
"Groups:\t",
get_task_state(p),
p->pid, p->p_pptr->pid,
p->uid, p->euid, p->suid, p->fsuid,
p->gid, p->egid, p->sgid, p->fsgid);
p->gid, p->egid, p->sgid, p->fsgid,
p->files ? p->files->max_fds : 0);
for (g = 0; g < p->ngroups; g++)
buffer += sprintf(buffer, "%d ", p->groups[g]);
......
......@@ -106,7 +106,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
/* handle last in-complete long-word first */
set = ~(~0UL << (n & (__NFDBITS-1)));
n /= __NFDBITS;
open_fds = current->files->open_fds.fds_bits+n;
open_fds = current->files->open_fds->fds_bits+n;
max = 0;
if (set) {
set &= BITS(fds, n);
......@@ -268,8 +268,8 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
if (n < 0)
goto out_nofds;
if (n > KFDS_NR)
n = KFDS_NR;
if (n > current->files->max_fdset + 1)
n = current->files->max_fdset + 1;
/*
* We need 6 bitmaps (in/out/ex for both incoming and outgoing),
......@@ -277,7 +277,7 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
* long-words.
*/
ret = -ENOMEM;
size = FDS_BYTES(n);
size = (n + 8 * sizeof(long) - 1) / (8 * sizeof(long)) * sizeof(long);
bits = kmalloc(6 * size, GFP_KERNEL);
if (!bits)
goto out_nofds;
......@@ -380,7 +380,7 @@ asmlinkage int sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
lock_kernel();
/* Do a sanity check on nfds ... */
err = -EINVAL;
if (nfds > NR_OPEN)
if (nfds > current->files->max_fds)
goto out;
if (timeout) {
......
......@@ -28,7 +28,7 @@
{_STK_LIM, _STK_LIM}, /* RLIMIT_STACK */ \
{ 0, LONG_MAX}, /* RLIMIT_CORE */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_RSS */ \
{ NR_OPEN, NR_OPEN}, /* RLIMIT_NOFILE */ \
{INR_OPEN, INR_OPEN}, /* RLIMIT_NOFILE */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_AS */ \
{MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, /* RLIMIT_NPROC */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_MEMLOCK */ \
......
......@@ -29,7 +29,7 @@
{ 0, LONG_MAX }, \
{ LONG_MAX, LONG_MAX }, \
{ MAX_TASKS_PER_USER, MAX_TASKS_PER_USER }, \
{ NR_OPEN, NR_OPEN }, \
{ INR_OPEN, INR_OPEN }, \
{ LONG_MAX, LONG_MAX }, \
{ LONG_MAX, LONG_MAX }, \
}
......
......@@ -29,7 +29,7 @@
{ 0, LONG_MAX }, \
{ LONG_MAX, LONG_MAX }, \
{ 0, 0 }, \
{ NR_OPEN, NR_OPEN }, \
{ INR_OPEN, INR_OPEN }, \
{ LONG_MAX, LONG_MAX }, \
{ LONG_MAX, LONG_MAX }, \
}
......
......@@ -29,7 +29,7 @@
{ 0, LONG_MAX}, \
{LONG_MAX, LONG_MAX}, \
{MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \
{NR_OPEN, NR_OPEN}, \
{INR_OPEN, INR_OPEN}, \
{LONG_MAX, LONG_MAX}, \
{LONG_MAX, LONG_MAX} \
}
......
......@@ -34,7 +34,7 @@
{ LONG_MAX, LONG_MAX }, \
{ _STK_LIM, LONG_MAX }, \
{ 0, LONG_MAX }, \
{ NR_OPEN, NR_OPEN }, \
{ INR_OPEN, INR_OPEN }, \
{ LONG_MAX, LONG_MAX }, \
{ LONG_MAX, LONG_MAX }, \
{ MAX_TASKS_PER_USER, MAX_TASKS_PER_USER }, \
......
......@@ -25,7 +25,7 @@
{ 0, LONG_MAX}, /* RLIMIT_CORE */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_RSS */ \
{MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, /* RLIMIT_NPROC */ \
{ NR_OPEN, NR_OPEN}, /* RLIMIT_NOFILE */ \
{INR_OPEN, INR_OPEN}, /* RLIMIT_NOFILE */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_MEMLOCK */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_AS */ \
}
......
......@@ -31,7 +31,7 @@
{LONG_MAX, LONG_MAX}, {LONG_MAX, LONG_MAX}, \
{LONG_MAX, LONG_MAX}, {_STK_LIM, LONG_MAX}, \
{ 0, LONG_MAX}, {LONG_MAX, LONG_MAX}, \
{NR_OPEN, NR_OPEN}, {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \
{INR_OPEN, INR_OPEN}, {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \
{LONG_MAX, LONG_MAX}, {LONG_MAX, LONG_MAX} \
}
......
......@@ -30,7 +30,7 @@
{LONG_MAX, LONG_MAX}, {LONG_MAX, LONG_MAX}, \
{LONG_MAX, LONG_MAX}, {_STK_LIM, LONG_MAX}, \
{ 0, LONG_MAX}, {LONG_MAX, LONG_MAX}, \
{NR_OPEN, NR_OPEN}, {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \
{INR_OPEN, INR_OPEN}, {MAX_TASKS_PER_USER, MAX_TASKS_PER_USER}, \
{LONG_MAX, LONG_MAX}, {LONG_MAX, LONG_MAX} \
}
......
......@@ -55,18 +55,6 @@ extern inline struct file * fget(unsigned int fd)
return file;
}
/*
* Install a file pointer in the fd array.
*/
extern inline void fd_install(unsigned int fd, struct file * file)
{
struct files_struct *files = current->files;
write_lock(&files->file_lock);
files->fd[fd] = file;
write_unlock(&files->file_lock);
}
/*
* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
*
......@@ -90,4 +78,26 @@ extern inline void fput(struct file * file)
}
extern void put_filp(struct file *);
/*
* Install a file pointer in the fd array.
*
* The VFS is full of places where we drop the files lock between
* setting the open_fds bitmap and installing the file in the file
* array. At any such point, we are vulnerable to a dup2() race
* installing a file in the array before us. We need to detect this and
* fput() the struct file we are about to overwrite in this case.
*/
extern inline void fd_install(unsigned int fd, struct file * file)
{
struct files_struct *files = current->files;
struct file * result;
write_lock(&files->file_lock);
result = xchg(&files->fd[fd], file);
write_unlock(&files->file_lock);
if (result)
fput(result);
}
#endif /* __LINUX_FILE_H */
......@@ -27,17 +27,19 @@ struct poll_table_struct;
/*
* It's silly to have NR_OPEN bigger than NR_FILE, but I'll fix
* that later. Anyway, now the file code is no longer dependent
* on bitmaps in unsigned longs, but uses the new fd_set structure..
* It's silly to have NR_OPEN bigger than NR_FILE, but you can change
* the file limit at runtime and only root can increase the per-process
* nr_file rlimit, so it's safe to set up a ridiculously high absolute
* upper limit on files-per-process.
*
* Some programs (notably those using select()) may have to be
* recompiled to take full advantage of the new limits..
* recompiled to take full advantage of the new limits..
*/
/* Fixed constants first: */
#undef NR_OPEN
#define NR_OPEN 1024
#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */
#define INR_OPEN 1024 /* Initial setting for nfile rlimits */
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
......@@ -62,6 +64,7 @@ extern int max_super_blocks, nr_super_blocks;
#define WRITE 1
#define READA 2 /* read-ahead - don't block if no resources */
#define WRITEA 3 /* write-ahead - don't block if no resources */
#define WRITERAW 5 /* raw write - don't play with buffer lists */
#ifndef NULL
#define NULL ((void *) 0)
......@@ -228,6 +231,7 @@ struct buffer_head {
unsigned long b_rsector; /* Real buffer location on disk */
wait_queue_head_t b_wait;
struct kiobuf * b_kiobuf; /* kiobuf which owns this IO */
};
typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
......@@ -704,6 +708,7 @@ extern inline int locks_verify_area(int read_write, struct inode *inode,
asmlinkage int sys_open(const char *, int, int);
asmlinkage int sys_close(unsigned int); /* yes, it's really unsigned */
extern int do_close(unsigned int, int); /* yes, it's really unsigned */
extern int do_truncate(struct dentry *, unsigned long);
extern int get_unused_fd(void);
extern void put_unused_fd(unsigned int);
......
/*
* iobuf.h
*
* Defines the structures used to track abstract kernel-space io buffers.
*
*/
#ifndef __LINUX_IOBUF_H
#define __LINUX_IOBUF_H
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/wait.h>
#include <asm/atomic.h>
/*
* The kiobuf structure describes a physical set of pages reserved
* locked for IO. The reference counts on each page will have been
* incremented, and the flags field will indicate whether or not we have
* pre-locked all of the pages for IO.
*
* kiobufs may be passed in arrays to form a kiovec, but we must
* preserve the property that no page is present more than once over the
* entire iovec.
*/
#define KIO_MAX_ATOMIC_IO 64 /* in kb */
#define KIO_MAX_ATOMIC_BYTES (64 * 1024)
#define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2)
struct kiobuf
{
int nr_pages; /* Pages actually referenced */
int array_len; /* Space in the allocated lists */
int offset; /* Offset to start of valid data */
int length; /* Number of valid bytes of data */
/* Keep separate track of the physical addresses and page
* structs involved. If we do IO to a memory-mapped device
* region, there won't necessarily be page structs defined for
* every address. */
unsigned long * pagelist;
struct page ** maplist;
unsigned int locked : 1; /* If set, pages has been locked */
/* Always embed enough struct pages for 64k of IO */
unsigned long page_array[KIO_STATIC_PAGES];
struct page * map_array[KIO_STATIC_PAGES];
/* Dynamic state for IO completion: */
atomic_t io_count; /* IOs still in progress */
int errno; /* Status of completed IO */
void (*end_io) (struct kiobuf *); /* Completion callback */
wait_queue_head_t wait_queue;
};
/* mm/memory.c */
int map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
void unmap_kiobuf(struct kiobuf *iobuf);
/* fs/iobuf.c */
void __init kiobuf_init(void);
void simple_wakeup_kiobuf(struct kiobuf *);
int alloc_kiovec(int nr, struct kiobuf **);
void free_kiovec(int nr, struct kiobuf **);
int expand_kiobuf(struct kiobuf *, int);
void kiobuf_wait_for_io(struct kiobuf *);
/* fs/buffer.c */
int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
kdev_t dev, unsigned long b[], int size, int bmap);
#endif /* __LINUX_IOBUF_H */
#ifndef _LINUX_LIMITS_H
#define _LINUX_LIMITS_H
#define NR_OPEN 1024
#define NR_OPEN 1024
#define NGROUPS_MAX 32 /* supplemental group IDs are available */
#define ARG_MAX 131072 /* # bytes of args + environ for exec() */
......
......@@ -115,6 +115,8 @@
#define AURORA_MAJOR 79
#define RAW_MAJOR 162
#define UNIX98_PTY_MASTER_MAJOR 128
#define UNIX98_PTY_MAJOR_COUNT 8
#define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
......
#ifndef __LINUX_RAW_H
#define __LINUX_RAW_H
#include <linux/types.h>
#define RAW_SETBIND _IO( 0xac, 0 )
#define RAW_GETBIND _IO( 0xac, 1 )
struct raw_config_request
{
int raw_minor;
__u64 block_major;
__u64 block_minor;
};
#ifdef __KERNEL__
/* drivers/char/raw.c */
extern void raw_init(void);
#endif /* __KERNEL__ */
#endif /* __LINUX_RAW_H */
......@@ -127,6 +127,12 @@ extern void trap_init(void);
extern signed long FASTCALL(schedule_timeout(signed long timeout));
asmlinkage void schedule(void);
/*
* The default fd array needs to be at least BITS_PER_LONG,
* as this is the granularity returned by copy_fdset().
*/
#define NR_OPEN_DEFAULT BITS_PER_LONG
/*
* Open file table structure
*/
......@@ -134,18 +140,28 @@ struct files_struct {
atomic_t count;
rwlock_t file_lock;
int max_fds;
int max_fdset;
int next_fd;
struct file ** fd; /* current fd array */
fd_set close_on_exec;
fd_set open_fds;
fd_set *close_on_exec;
fd_set *open_fds;
fd_set close_on_exec_init;
fd_set open_fds_init;
struct file * fd_array[NR_OPEN_DEFAULT];
};
#define INIT_FILES { \
ATOMIC_INIT(1), \
RW_LOCK_UNLOCKED, \
NR_OPEN, \
&init_fd_array[0], \
NR_OPEN_DEFAULT, \
__FD_SETSIZE, \
0, \
&init_files.fd_array[0], \
&init_files.close_on_exec_init, \
&init_files.open_fds_init, \
{ { 0, } }, \
{ { 0, } } \
{ { 0, } }, \
{ NULL, } \
}
struct fs_struct {
......@@ -633,6 +649,48 @@ extern void mmput(struct mm_struct *);
/* Remove the current tasks stale references to the old mm_struct */
extern void mm_release(void);
/*
* Routines for handling the fd arrays
*/
extern struct file ** alloc_fd_array(int);
extern int expand_fd_array(struct files_struct *, int nr);
extern void free_fd_array(struct file **, int);
extern fd_set *alloc_fdset(int);
extern int expand_fdset(struct files_struct *, int nr);
extern void free_fdset(fd_set *, int);
/* Expand files. Return <0 on error; 0 nothing done; 1 files expanded,
* we may have blocked.
*
* Should be called with the files->file_lock spinlock held for write.
*/
static inline int expand_files(struct files_struct *files, int nr)
{
int err, expand = 0;
#ifdef FDSET_DEBUG
printk (KERN_ERR __FUNCTION__ " %d: nr = %d\n", current->pid, nr);
#endif
if (nr >= files->max_fdset) {
expand = 1;
if ((err = expand_fdset(files, nr)))
goto out;
}
if (nr >= files->max_fds) {
expand = 1;
if ((err = expand_fd_array(files, nr)))
goto out;
}
err = expand;
out:
#ifdef FDSET_DEBUG
if (err)
printk (KERN_ERR __FUNCTION__ " %d: return %d\n", current->pid, err);
#endif
return err;
}
extern int copy_thread(int, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
extern void flush_thread(void);
extern void exit_thread(void);
......
......@@ -23,6 +23,7 @@
#include <linux/smp_lock.h>
#include <linux/blk.h>
#include <linux/hdreg.h>
#include <linux/iobuf.h>
#include <asm/io.h>
#include <asm/bugs.h>
......@@ -1193,6 +1194,7 @@ asmlinkage void __init start_kernel(void)
vma_init();
buffer_init(memory_end-memory_start);
page_cache_init(memory_end-memory_start);
kiobuf_init();
signals_init();
inode_init();
file_table_init();
......
......@@ -149,11 +149,11 @@ static inline void close_files(struct files_struct * files)
j = 0;
for (;;) {
unsigned long set = files->open_fds.fds_bits[j];
unsigned long set;
i = j * __NFDBITS;
j++;
if (i >= files->max_fds)
if (i >= files->max_fdset || i >= files->max_fds)
break;
set = files->open_fds->fds_bits[j++];
while (set) {
if (set & 1) {
struct file * file = xchg(&files->fd[i], NULL);
......@@ -176,12 +176,14 @@ static inline void __exit_files(struct task_struct *tsk)
if (atomic_dec_and_test(&files->count)) {
close_files(files);
/*
* Free the fd array as appropriate ...
* Free the fd and fdset arrays if we expanded them.
*/
if (NR_OPEN * sizeof(struct file *) == PAGE_SIZE)
free_page((unsigned long) files->fd);
else
kfree(files->fd);
if (files->fd != &files->fd_array[0])
free_fd_array(files->fd, files->max_fds);
if (files->max_fdset > __FD_SETSIZE) {
free_fdset(files->open_fds, files->max_fdset);
free_fdset(files->close_on_exec, files->max_fdset);
}
kmem_cache_free(files_cachep, files);
}
}
......
......@@ -433,32 +433,24 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
return 0;
}
/*
* Copy a fd_set and compute the maximum fd it contains.
*/
static inline int __copy_fdset(unsigned long *d, unsigned long *src)
static int count_open_files(struct files_struct *files, int size)
{
int i;
unsigned long *p = src;
unsigned long *max = src;
for (i = __FDSET_LONGS; i; --i) {
if ((*d++ = *p++) != 0)
max = p;
int i;
/* Find the last open fd */
for (i = size/(8*sizeof(long)); i > 0; ) {
if (files->open_fds->fds_bits[--i])
break;
}
return (max - src)*sizeof(long)*8;
}
static inline int copy_fdset(fd_set *dst, fd_set *src)
{
return __copy_fdset(dst->fds_bits, src->fds_bits);
i = (i+1) * 8 * sizeof(long);
return i;
}
static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
struct files_struct *oldf, *newf;
struct file **old_fds, **new_fds;
int size, i, error = 0;
int open_files, nfds, size, i, error = 0;
/*
* A background process may not have any files ...
......@@ -478,43 +470,85 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
if (!newf)
goto out;
/*
* Allocate the fd array, using get_free_page() if possible.
* Eventually we want to make the array size variable ...
*/
size = NR_OPEN * sizeof(struct file *);
if (size == PAGE_SIZE)
new_fds = (struct file **) __get_free_page(GFP_KERNEL);
else
new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
if (!new_fds)
goto out_release;
newf->file_lock = RW_LOCK_UNLOCKED;
atomic_set(&newf->count, 1);
newf->max_fds = NR_OPEN;
newf->fd = new_fds;
newf->file_lock = RW_LOCK_UNLOCKED;
newf->next_fd = 0;
newf->max_fds = NR_OPEN_DEFAULT;
newf->max_fdset = __FD_SETSIZE;
newf->close_on_exec = &newf->close_on_exec_init;
newf->open_fds = &newf->open_fds_init;
newf->fd = &newf->fd_array[0];
/* We don't yet have the oldf readlock, but even if the old
fdset gets grown now, we'll only copy up to "size" fds */
size = oldf->max_fdset;
if (size > __FD_SETSIZE) {
newf->max_fdset = 0;
write_lock(&newf->file_lock);
error = expand_fdset(newf, size);
write_unlock(&newf->file_lock);
if (error)
goto out_release;
}
read_lock(&oldf->file_lock);
newf->close_on_exec = oldf->close_on_exec;
i = copy_fdset(&newf->open_fds, &oldf->open_fds);
open_files = count_open_files(oldf, size);
/*
* Check whether we need to allocate a larger fd array.
* Note: we're not a clone task, so the open count won't
* change.
*/
nfds = NR_OPEN_DEFAULT;
if (open_files > nfds) {
read_unlock(&oldf->file_lock);
newf->max_fds = 0;
write_lock(&newf->file_lock);
error = expand_fd_array(newf, open_files);
write_unlock(&newf->file_lock);
if (error)
goto out_release;
nfds = newf->max_fds;
read_lock(&oldf->file_lock);
}
old_fds = oldf->fd;
for (; i != 0; i--) {
new_fds = newf->fd;
memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
for (i = open_files; i != 0; i--) {
struct file *f = *old_fds++;
if (f)
get_file(f);
*new_fds++ = f;
}
read_unlock(&oldf->file_lock);
/* compute the remainder to be cleared */
size = (newf->max_fds - open_files) * sizeof(struct file *);
/* This is long word aligned thus could use a optimized version */
memset(new_fds, 0, (char *)newf->fd + size - (char *)new_fds);
memset(new_fds, 0, size);
if (newf->max_fdset > open_files) {
int left = (newf->max_fdset-open_files)/8;
int start = open_files / (8 * sizeof(unsigned long));
memset(&newf->open_fds->fds_bits[start], 0, left);
memset(&newf->close_on_exec->fds_bits[start], 0, left);
}
tsk->files = newf;
error = 0;
out:
return error;
out_release:
free_fdset (newf->close_on_exec, newf->max_fdset);
free_fdset (newf->open_fds, newf->max_fdset);
kmem_cache_free(files_cachep, newf);
goto out;
}
......
......@@ -39,6 +39,7 @@
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
#include <linux/swapctl.h>
#include <linux/iobuf.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
......@@ -406,6 +407,181 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s
}
}
/*
* Do a quick page-table lookup for a single page.
*/
static unsigned long follow_page(unsigned long address)
{
pgd_t *pgd;
pmd_t *pmd;
pgd = pgd_offset(current->mm, address);
pmd = pmd_offset(pgd, address);
if (pmd) {
pte_t * pte = pte_offset(pmd, address);
if (pte && pte_present(*pte)) {
return pte_page(*pte);
}
}
printk(KERN_ERR "Missing page in follow_page\n");
return 0;
}
/*
* Given a physical address, is there a useful struct page pointing to it?
*/
static struct page * get_page_map(unsigned long page)
{
struct page *map;
if (MAP_NR(page) >= max_mapnr)
return 0;
if (page == ZERO_PAGE(page))
return 0;
map = mem_map + MAP_NR(page);
if (PageReserved(map))
return 0;
return map;
}
/*
* Force in an entire range of pages from the current process's user VA,
* and pin and lock the pages for IO.
*/
#define dprintk(x...)
int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
{
unsigned long ptr, end;
int err;
struct mm_struct * mm;
struct vm_area_struct * vma = 0;
unsigned long page;
struct page * map;
int doublepage = 0;
int repeat = 0;
int i;
/* Make sure the iobuf is not already mapped somewhere. */
if (iobuf->nr_pages)
return -EINVAL;
mm = current->mm;
dprintk ("map_user_kiobuf: begin\n");
ptr = va & PAGE_MASK;
end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
if (err)
return err;
repeat:
down(&mm->mmap_sem);
err = -EFAULT;
iobuf->locked = 1;
iobuf->offset = va & ~PAGE_MASK;
iobuf->length = len;
i = 0;
/*
* First of all, try to fault in all of the necessary pages
*/
while (ptr < end) {
if (!vma || ptr >= vma->vm_end) {
vma = find_vma(current->mm, ptr);
if (!vma)
goto out_unlock;
}
if (!handle_mm_fault(current, vma, ptr, (rw==READ)))
goto out_unlock;
page = follow_page(ptr);
if (!page) {
printk (KERN_ERR "Missing page in map_user_kiobuf\n");
goto out_unlock;
}
map = get_page_map(page);
if (map) {
if (TryLockPage(map))
goto retry;
atomic_inc(&map->count);
}
dprintk ("Installing page %p %p: %d\n", (void *)page, map, i);
iobuf->pagelist[i] = page;
iobuf->maplist[i] = map;
iobuf->nr_pages = ++i;
ptr += PAGE_SIZE;
}
up(&mm->mmap_sem);
dprintk ("map_user_kiobuf: end OK\n");
return 0;
out_unlock:
up(&mm->mmap_sem);
unmap_kiobuf(iobuf);
dprintk ("map_user_kiobuf: end %d\n", err);
return err;
retry:
/*
* Undo the locking so far, wait on the page we got to, and try again.
*/
unmap_kiobuf(iobuf);
up(&mm->mmap_sem);
/*
* Did the release also unlock the page we got stuck on?
*/
if (!PageLocked(map)) {
/* If so, we may well have the page mapped twice in the
* IO address range. Bad news. Of course, it _might_
* just be a coincidence, but if it happens more than
* once, chances are we have a double-mapped page. */
if (++doublepage >= 3) {
return -EINVAL;
}
}
/*
* Try again...
*/
wait_on_page(map);
if (++repeat < 16)
goto repeat;
return -EAGAIN;
}
/*
* Unmap all of the pages referenced by a kiobuf. We release the pages,
* and unlock them if they were locked.
*/
void unmap_kiobuf (struct kiobuf *iobuf)
{
int i;
struct page *map;
for (i = 0; i < iobuf->nr_pages; i++) {
map = iobuf->maplist[i];
if (map && iobuf->locked) {
__free_page(map);
UnlockPage(map);
}
}
iobuf->nr_pages = 0;
iobuf->locked = 0;
}
static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
unsigned long size, pgprot_t prot)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment