Commit 4be68d9c authored by Theodore Y. Ts'o's avatar Theodore Y. Ts'o Committed by Linus Torvalds

[PATCH] Fixup Orlov block allocator for ext2

I finally had time to look at the Orlov patches, and found a memory
leak; sbi->s_debts wasn't getting freed when the filesystem was
getting unmounted, or in the error path.

This patch also makes the following cleanups/changes:

1) Use sbi->s_debts instead of sbi->debts --- all other fields in
	struct ext2_sb_info are prefixed by "s_", so this makes things
	consistent.

2) Add support for a new inode flag, EXT2_TOPDIR_FL, which tells tells
	the Orlov allocator to treat that directory as the top of
	directory hierarchies, so that new subdirectories created in
	that directory should be spread apart.  System administrators
	should set this flag on directories like /usr/src, /usr/home, etc.

3) Add a mount-time flag, -o oldalloc, which forces the use of the old
	inode (pre-Orlov) allocator.  This makes it easier to do
	comparison benchmarks, and in case people want to use the old
	algorithm.
parent eb74a93e
...@@ -209,9 +209,7 @@ static void ext2_preread_inode(struct inode *inode) ...@@ -209,9 +209,7 @@ static void ext2_preread_inode(struct inode *inode)
* For other inodes, search forward from the parent directory\'s block * For other inodes, search forward from the parent directory\'s block
* group to find a free inode. * group to find a free inode.
*/ */
#if 0 static int find_group_dir(struct super_block *sb, struct inode *parent)
static int find_group_dir(struct super_block *sb, int parent_group)
{ {
struct ext2_super_block * es = EXT2_SB(sb)->s_es; struct ext2_super_block * es = EXT2_SB(sb)->s_es;
int ngroups = EXT2_SB(sb)->s_groups_count; int ngroups = EXT2_SB(sb)->s_groups_count;
...@@ -243,7 +241,6 @@ static int find_group_dir(struct super_block *sb, int parent_group) ...@@ -243,7 +241,6 @@ static int find_group_dir(struct super_block *sb, int parent_group)
mark_buffer_dirty(best_bh); mark_buffer_dirty(best_bh);
return best_group; return best_group;
} }
#endif
/* /*
* Orlov's allocator for directories. * Orlov's allocator for directories.
...@@ -289,7 +286,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) ...@@ -289,7 +286,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
struct ext2_group_desc *desc; struct ext2_group_desc *desc;
struct buffer_head *bh; struct buffer_head *bh;
if (parent == sb->s_root->d_inode) { if ((parent == sb->s_root->d_inode) ||
(parent->i_flags & EXT2_TOPDIR_FL)) {
struct ext2_group_desc *best_desc = NULL; struct ext2_group_desc *best_desc = NULL;
struct buffer_head *best_bh = NULL; struct buffer_head *best_bh = NULL;
int best_ndir = inodes_per_group; int best_ndir = inodes_per_group;
...@@ -342,7 +340,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) ...@@ -342,7 +340,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
desc = ext2_get_group_desc (sb, group, &bh); desc = ext2_get_group_desc (sb, group, &bh);
if (!desc || !desc->bg_free_inodes_count) if (!desc || !desc->bg_free_inodes_count)
continue; continue;
if (sbi->debts[group] >= max_debt) if (sbi->s_debts[group] >= max_debt)
continue; continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
continue; continue;
...@@ -447,9 +445,12 @@ struct inode * ext2_new_inode(struct inode * dir, int mode) ...@@ -447,9 +445,12 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
lock_super (sb); lock_super (sb);
es = EXT2_SB(sb)->s_es; es = EXT2_SB(sb)->s_es;
repeat: repeat:
if (S_ISDIR(mode)) if (S_ISDIR(mode)) {
group = find_group_orlov(sb, dir); if (test_opt (sb, OLDALLOC))
group = find_group_dir(sb, dir);
else else
group = find_group_orlov(sb, dir);
} else
group = find_group_other(sb, dir); group = find_group_other(sb, dir);
err = -ENOSPC; err = -ENOSPC;
...@@ -488,11 +489,11 @@ struct inode * ext2_new_inode(struct inode * dir, int mode) ...@@ -488,11 +489,11 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1); cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
if (S_ISDIR(mode)) { if (S_ISDIR(mode)) {
if (EXT2_SB(sb)->debts[group] < 255) if (EXT2_SB(sb)->s_debts[group] < 255)
EXT2_SB(sb)->debts[group]++; EXT2_SB(sb)->s_debts[group]++;
} else { } else {
if (EXT2_SB(sb)->debts[group]) if (EXT2_SB(sb)->s_debts[group])
EXT2_SB(sb)->debts[group]--; EXT2_SB(sb)->s_debts[group]--;
} }
mark_buffer_dirty(EXT2_SB(sb)->s_sbh); mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
......
...@@ -140,6 +140,7 @@ static void ext2_put_super (struct super_block * sb) ...@@ -140,6 +140,7 @@ static void ext2_put_super (struct super_block * sb)
if (sbi->s_group_desc[i]) if (sbi->s_group_desc[i])
brelse (sbi->s_group_desc[i]); brelse (sbi->s_group_desc[i]);
kfree(sbi->s_group_desc); kfree(sbi->s_group_desc);
kfree(sbi->s_debts);
brelse (sbi->s_sbh); brelse (sbi->s_sbh);
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
kfree(sbi); kfree(sbi);
...@@ -385,6 +386,10 @@ static int parse_options (char * options, ...@@ -385,6 +386,10 @@ static int parse_options (char * options,
return 0; return 0;
sbi->s_resuid = v; sbi->s_resuid = v;
} }
else if (!strcmp (this_char, "oldalloc"))
set_opt (sbi->s_mount_opt, OLDALLOC);
else if (!strcmp (this_char, "orlov"))
clear_opt (sbi->s_mount_opt, OLDALLOC);
/* Silently ignore the quota options */ /* Silently ignore the quota options */
else if (!strcmp (this_char, "grpquota") else if (!strcmp (this_char, "grpquota")
|| !strcmp (this_char, "noquota") || !strcmp (this_char, "noquota")
...@@ -756,13 +761,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -756,13 +761,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
printk ("EXT2-fs: not enough memory\n"); printk ("EXT2-fs: not enough memory\n");
goto failed_mount; goto failed_mount;
} }
sbi->debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->debts), sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
GFP_KERNEL); GFP_KERNEL);
if (!sbi->debts) { if (!sbi->s_debts) {
printk ("EXT2-fs: not enough memory\n"); printk ("EXT2-fs: not enough memory\n");
goto failed_mount_group_desc; goto failed_mount_group_desc;
} }
memset(sbi->debts, 0, sbi->s_groups_count * sizeof(*sbi->debts)); memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts));
for (i = 0; i < db_count; i++) { for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logic_sb_block, i); block = descriptor_loc(sb, logic_sb_block, i);
sbi->s_group_desc[i] = sb_bread(sb, block); sbi->s_group_desc[i] = sb_bread(sb, block);
...@@ -771,7 +776,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -771,7 +776,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
brelse (sbi->s_group_desc[j]); brelse (sbi->s_group_desc[j]);
kfree(sbi->s_group_desc); kfree(sbi->s_group_desc);
printk ("EXT2-fs: unable to read group descriptors\n"); printk ("EXT2-fs: unable to read group descriptors\n");
goto failed_mount; goto failed_mount_group_desc;
} }
} }
if (!ext2_check_descriptors (sb)) { if (!ext2_check_descriptors (sb)) {
...@@ -808,6 +813,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -808,6 +813,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
brelse(sbi->s_group_desc[i]); brelse(sbi->s_group_desc[i]);
failed_mount_group_desc: failed_mount_group_desc:
kfree(sbi->s_group_desc); kfree(sbi->s_group_desc);
if (sbi->s_debts)
kfree(sbi->s_debts);
failed_mount: failed_mount:
brelse(bh); brelse(bh);
failed_sbi: failed_sbi:
......
...@@ -191,10 +191,11 @@ struct ext2_group_desc ...@@ -191,10 +191,11 @@ struct ext2_group_desc
#define EXT2_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ #define EXT2_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */
#define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ #define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
#define EXT2_FL_USER_VISIBLE 0x00011FFF /* User visible flags */ #define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
#define EXT2_FL_USER_MODIFIABLE 0x000100FF /* User modifiable flags */ #define EXT2_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
/* /*
* ioctl commands * ioctl commands
...@@ -300,6 +301,7 @@ struct ext2_inode { ...@@ -300,6 +301,7 @@ struct ext2_inode {
* Mount flags * Mount flags
*/ */
#define EXT2_MOUNT_CHECK 0x0001 /* Do mount-time checks */ #define EXT2_MOUNT_CHECK 0x0001 /* Do mount-time checks */
#define EXT2_MOUNT_OLDALLOC 0x0002 /* Don't use the new Orlov allocator */
#define EXT2_MOUNT_GRPID 0x0004 /* Create files with directory's group */ #define EXT2_MOUNT_GRPID 0x0004 /* Create files with directory's group */
#define EXT2_MOUNT_DEBUG 0x0008 /* Some debugging messages */ #define EXT2_MOUNT_DEBUG 0x0008 /* Some debugging messages */
#define EXT2_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ #define EXT2_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */
......
...@@ -44,7 +44,7 @@ struct ext2_sb_info { ...@@ -44,7 +44,7 @@ struct ext2_sb_info {
int s_first_ino; int s_first_ino;
u32 s_next_generation; u32 s_next_generation;
unsigned long s_dir_count; unsigned long s_dir_count;
u8 *debts; u8 *s_debts;
}; };
#endif /* _LINUX_EXT2_FS_SB */ #endif /* _LINUX_EXT2_FS_SB */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment