Commit 1fd36adc authored by David Howells's avatar David Howells Committed by H. Peter Anvin

Replace the fd_sets in struct fdtable with an array of unsigned longs

Replace the fd_sets in struct fdtable with an array of unsigned longs and then
use the standard non-atomic bit operations rather than the FD_* macros.

This:

 (1) Removes the abuses of struct fd_set:

     (a) Since we don't want to allocate a full fd_set the vast majority of the
     	 time, we actually, in effect, just allocate a just-big-enough array of
     	 unsigned longs and cast it to an fd_set type - so why bother with the
     	 fd_set at all?

     (b) Some places outside of the core fdtable handling code (such as
     	 SELinux) want to look inside the array of unsigned longs hidden inside
     	 the fd_set struct for more efficient iteration over the entire set.

 (2) Eliminates the use of FD_*() macros in the kernel completely.

 (3) Permits the __FD_*() macros to be deleted entirely where not exposed to
     userspace.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120216174954.23314.48147.stgit@warthog.procyon.org.ukSigned-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
parent 1dce27c5
...@@ -1026,10 +1026,10 @@ static void flush_old_files(struct files_struct * files) ...@@ -1026,10 +1026,10 @@ static void flush_old_files(struct files_struct * files)
fdt = files_fdtable(files); fdt = files_fdtable(files);
if (i >= fdt->max_fds) if (i >= fdt->max_fds)
break; break;
set = fdt->close_on_exec->fds_bits[j]; set = fdt->close_on_exec[j];
if (!set) if (!set)
continue; continue;
fdt->close_on_exec->fds_bits[j] = 0; fdt->close_on_exec[j] = 0;
spin_unlock(&files->file_lock); spin_unlock(&files->file_lock);
for ( ; set ; i++,set >>= 1) { for ( ; set ; i++,set >>= 1) {
if (set & 1) { if (set & 1) {
......
...@@ -40,7 +40,7 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */ ...@@ -40,7 +40,7 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */
*/ */
static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
static void *alloc_fdmem(unsigned int size) static void *alloc_fdmem(size_t size)
{ {
/* /*
* Very large allocations can stress page reclaim, so fall back to * Very large allocations can stress page reclaim, so fall back to
...@@ -142,7 +142,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) ...@@ -142,7 +142,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
static struct fdtable * alloc_fdtable(unsigned int nr) static struct fdtable * alloc_fdtable(unsigned int nr)
{ {
struct fdtable *fdt; struct fdtable *fdt;
char *data; void *data;
/* /*
* Figure out how many fds we actually want to support in this fdtable. * Figure out how many fds we actually want to support in this fdtable.
...@@ -172,14 +172,15 @@ static struct fdtable * alloc_fdtable(unsigned int nr) ...@@ -172,14 +172,15 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
data = alloc_fdmem(nr * sizeof(struct file *)); data = alloc_fdmem(nr * sizeof(struct file *));
if (!data) if (!data)
goto out_fdt; goto out_fdt;
fdt->fd = (struct file **)data; fdt->fd = data;
data = alloc_fdmem(max_t(unsigned int,
data = alloc_fdmem(max_t(size_t,
2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
if (!data) if (!data)
goto out_arr; goto out_arr;
fdt->open_fds = (fd_set *)data; fdt->open_fds = data;
data += nr / BITS_PER_BYTE; data += nr / BITS_PER_LONG;
fdt->close_on_exec = (fd_set *)data; fdt->close_on_exec = data;
fdt->next = NULL; fdt->next = NULL;
return fdt; return fdt;
...@@ -275,11 +276,11 @@ static int count_open_files(struct fdtable *fdt) ...@@ -275,11 +276,11 @@ static int count_open_files(struct fdtable *fdt)
int i; int i;
/* Find the last open fd */ /* Find the last open fd */
for (i = size/(8*sizeof(long)); i > 0; ) { for (i = size / BITS_PER_LONG; i > 0; ) {
if (fdt->open_fds->fds_bits[--i]) if (fdt->open_fds[--i])
break; break;
} }
i = (i+1) * 8 * sizeof(long); i = (i + 1) * BITS_PER_LONG;
return i; return i;
} }
...@@ -306,8 +307,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) ...@@ -306,8 +307,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
newf->next_fd = 0; newf->next_fd = 0;
new_fdt = &newf->fdtab; new_fdt = &newf->fdtab;
new_fdt->max_fds = NR_OPEN_DEFAULT; new_fdt->max_fds = NR_OPEN_DEFAULT;
new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; new_fdt->close_on_exec = newf->close_on_exec_init;
new_fdt->open_fds = (fd_set *)&newf->open_fds_init; new_fdt->open_fds = newf->open_fds_init;
new_fdt->fd = &newf->fd_array[0]; new_fdt->fd = &newf->fd_array[0];
new_fdt->next = NULL; new_fdt->next = NULL;
...@@ -350,10 +351,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) ...@@ -350,10 +351,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
old_fds = old_fdt->fd; old_fds = old_fdt->fd;
new_fds = new_fdt->fd; new_fds = new_fdt->fd;
memcpy(new_fdt->open_fds->fds_bits, memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
old_fdt->open_fds->fds_bits, open_files/8); memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
memcpy(new_fdt->close_on_exec->fds_bits,
old_fdt->close_on_exec->fds_bits, open_files/8);
for (i = open_files; i != 0; i--) { for (i = open_files; i != 0; i--) {
struct file *f = *old_fds++; struct file *f = *old_fds++;
...@@ -379,11 +378,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) ...@@ -379,11 +378,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
memset(new_fds, 0, size); memset(new_fds, 0, size);
if (new_fdt->max_fds > open_files) { if (new_fdt->max_fds > open_files) {
int left = (new_fdt->max_fds-open_files)/8; int left = (new_fdt->max_fds - open_files) / 8;
int start = open_files / (8 * sizeof(unsigned long)); int start = open_files / BITS_PER_LONG;
memset(&new_fdt->open_fds->fds_bits[start], 0, left); memset(&new_fdt->open_fds[start], 0, left);
memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); memset(&new_fdt->close_on_exec[start], 0, left);
} }
rcu_assign_pointer(newf->fdt, new_fdt); rcu_assign_pointer(newf->fdt, new_fdt);
...@@ -419,8 +418,8 @@ struct files_struct init_files = { ...@@ -419,8 +418,8 @@ struct files_struct init_files = {
.fdtab = { .fdtab = {
.max_fds = NR_OPEN_DEFAULT, .max_fds = NR_OPEN_DEFAULT,
.fd = &init_files.fd_array[0], .fd = &init_files.fd_array[0],
.close_on_exec = (fd_set *)&init_files.close_on_exec_init, .close_on_exec = init_files.close_on_exec_init,
.open_fds = (fd_set *)&init_files.open_fds_init, .open_fds = init_files.open_fds_init,
}, },
.file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
}; };
...@@ -443,8 +442,7 @@ int alloc_fd(unsigned start, unsigned flags) ...@@ -443,8 +442,7 @@ int alloc_fd(unsigned start, unsigned flags)
fd = files->next_fd; fd = files->next_fd;
if (fd < fdt->max_fds) if (fd < fdt->max_fds)
fd = find_next_zero_bit(fdt->open_fds->fds_bits, fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
fdt->max_fds, fd);
error = expand_files(files, fd); error = expand_files(files, fd);
if (error < 0) if (error < 0)
......
...@@ -348,7 +348,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) ...@@ -348,7 +348,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
set = ~(~0UL << (n & (__NFDBITS-1))); set = ~(~0UL << (n & (__NFDBITS-1)));
n /= __NFDBITS; n /= __NFDBITS;
fdt = files_fdtable(current->files); fdt = files_fdtable(current->files);
open_fds = fdt->open_fds->fds_bits+n; open_fds = fdt->open_fds + n;
max = 0; max = 0;
if (set) { if (set) {
set &= BITS(fds, n); set &= BITS(fds, n);
......
...@@ -21,51 +21,43 @@ ...@@ -21,51 +21,43 @@
*/ */
#define NR_OPEN_DEFAULT BITS_PER_LONG #define NR_OPEN_DEFAULT BITS_PER_LONG
/*
* The embedded_fd_set is a small fd_set,
* suitable for most tasks (which open <= BITS_PER_LONG files)
*/
struct embedded_fd_set {
unsigned long fds_bits[1];
};
struct fdtable { struct fdtable {
unsigned int max_fds; unsigned int max_fds;
struct file __rcu **fd; /* current fd array */ struct file __rcu **fd; /* current fd array */
fd_set *close_on_exec; unsigned long *close_on_exec;
fd_set *open_fds; unsigned long *open_fds;
struct rcu_head rcu; struct rcu_head rcu;
struct fdtable *next; struct fdtable *next;
}; };
static inline void __set_close_on_exec(int fd, struct fdtable *fdt) static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
{ {
FD_SET(fd, fdt->close_on_exec); __set_bit(fd, fdt->close_on_exec);
} }
static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
{ {
FD_CLR(fd, fdt->close_on_exec); __clear_bit(fd, fdt->close_on_exec);
} }
static inline bool close_on_exec(int fd, const struct fdtable *fdt) static inline bool close_on_exec(int fd, const struct fdtable *fdt)
{ {
return FD_ISSET(fd, fdt->close_on_exec); return test_bit(fd, fdt->close_on_exec);
} }
static inline void __set_open_fd(int fd, struct fdtable *fdt) static inline void __set_open_fd(int fd, struct fdtable *fdt)
{ {
FD_SET(fd, fdt->open_fds); __set_bit(fd, fdt->open_fds);
} }
static inline void __clear_open_fd(int fd, struct fdtable *fdt) static inline void __clear_open_fd(int fd, struct fdtable *fdt)
{ {
FD_CLR(fd, fdt->open_fds); __clear_bit(fd, fdt->open_fds);
} }
static inline bool fd_is_open(int fd, const struct fdtable *fdt) static inline bool fd_is_open(int fd, const struct fdtable *fdt)
{ {
return FD_ISSET(fd, fdt->open_fds); return test_bit(fd, fdt->open_fds);
} }
/* /*
...@@ -83,8 +75,8 @@ struct files_struct { ...@@ -83,8 +75,8 @@ struct files_struct {
*/ */
spinlock_t file_lock ____cacheline_aligned_in_smp; spinlock_t file_lock ____cacheline_aligned_in_smp;
int next_fd; int next_fd;
struct embedded_fd_set close_on_exec_init; unsigned long close_on_exec_init[1];
struct embedded_fd_set open_fds_init; unsigned long open_fds_init[1];
struct file __rcu * fd_array[NR_OPEN_DEFAULT]; struct file __rcu * fd_array[NR_OPEN_DEFAULT];
}; };
......
...@@ -473,7 +473,7 @@ static void close_files(struct files_struct * files) ...@@ -473,7 +473,7 @@ static void close_files(struct files_struct * files)
i = j * __NFDBITS; i = j * __NFDBITS;
if (i >= fdt->max_fds) if (i >= fdt->max_fds)
break; break;
set = fdt->open_fds->fds_bits[j++]; set = fdt->open_fds[j++];
while (set) { while (set) {
if (set & 1) { if (set & 1) {
struct file * file = xchg(&fdt->fd[i], NULL); struct file * file = xchg(&fdt->fd[i], NULL);
......
...@@ -2145,7 +2145,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, ...@@ -2145,7 +2145,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
fdt = files_fdtable(files); fdt = files_fdtable(files);
if (i >= fdt->max_fds) if (i >= fdt->max_fds)
break; break;
set = fdt->open_fds->fds_bits[j]; set = fdt->open_fds[j];
if (!set) if (!set)
continue; continue;
spin_unlock(&files->file_lock); spin_unlock(&files->file_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment