Commit 2f86142b authored by Dave Kleikamp's avatar Dave Kleikamp

JFS: Avoid parallel allocations within the same allocation group

When large files are writting in parallel, allocating the space for
these files within the same allocation group can cause severe
fragmentation of the files.  By keeping track of open, growing files
within an allocation group, we can force other new allocations into
a different allocation group to avoid this.
parent d1cd8c07
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/fs.h> #include <linux/fs.h>
#include "jfs_incore.h" #include "jfs_incore.h"
#include "jfs_dmap.h"
#include "jfs_txnmgr.h" #include "jfs_txnmgr.h"
#include "jfs_xattr.h" #include "jfs_xattr.h"
#include "jfs_debug.h" #include "jfs_debug.h"
...@@ -94,6 +95,42 @@ static void jfs_truncate(struct inode *ip) ...@@ -94,6 +95,42 @@ static void jfs_truncate(struct inode *ip)
IWRITE_UNLOCK(ip); IWRITE_UNLOCK(ip);
} }
static int jfs_open(struct inode *inode, struct file *file)
{
int rc;
if ((rc = generic_file_open(inode, file)))
return rc;
/*
* We attempt to allow only one "active" file open per aggregate
* group. Otherwise, appending to files in parallel can cause
* fragmentation within the files.
*/
if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE) {
struct jfs_inode_info *ji = JFS_IP(inode);
if (ji->active_ag == -1) {
ji->active_ag = ji->agno;
atomic_inc(
&JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]);
}
}
return 0;
}
static int jfs_release(struct inode *inode, struct file *file)
{
struct jfs_inode_info *ji = JFS_IP(inode);
if (ji->active_ag != -1) {
struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
atomic_dec(&bmap->db_active[ji->active_ag]);
ji->active_ag = -1;
}
return 0;
}
struct inode_operations jfs_file_inode_operations = { struct inode_operations jfs_file_inode_operations = {
.truncate = jfs_truncate, .truncate = jfs_truncate,
.setxattr = jfs_setxattr, .setxattr = jfs_setxattr,
...@@ -103,7 +140,7 @@ struct inode_operations jfs_file_inode_operations = { ...@@ -103,7 +140,7 @@ struct inode_operations jfs_file_inode_operations = {
}; };
struct file_operations jfs_file_operations = { struct file_operations jfs_file_operations = {
.open = generic_file_open, .open = jfs_open,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
.write = generic_file_write, .write = generic_file_write,
.read = generic_file_read, .read = generic_file_read,
...@@ -112,4 +149,5 @@ struct file_operations jfs_file_operations = { ...@@ -112,4 +149,5 @@ struct file_operations jfs_file_operations = {
.writev = generic_file_writev, .writev = generic_file_writev,
.sendfile = generic_file_sendfile, .sendfile = generic_file_sendfile,
.fsync = jfs_fsync, .fsync = jfs_fsync,
.release = jfs_release,
}; };
This diff is collapsed.
...@@ -227,6 +227,7 @@ struct bmap { ...@@ -227,6 +227,7 @@ struct bmap {
struct dbmap db_bmap; /* on-disk aggregate map descriptor */ struct dbmap db_bmap; /* on-disk aggregate map descriptor */
struct inode *db_ipbmap; /* ptr to aggregate map incore inode */ struct inode *db_ipbmap; /* ptr to aggregate map incore inode */
struct semaphore db_bmaplock; /* aggregate map lock */ struct semaphore db_bmaplock; /* aggregate map lock */
atomic_t db_active[MAXAG]; /* count of active, open files in AG */
u32 *db_DBmap; u32 *db_DBmap;
}; };
......
...@@ -514,9 +514,12 @@ int extFill(struct inode *ip, xad_t * xp) ...@@ -514,9 +514,12 @@ int extFill(struct inode *ip, xad_t * xp)
static int static int
extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
{ {
struct jfs_inode_info *ji = JFS_IP(ip);
struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
s64 nb, nblks, daddr, max; s64 nb, nblks, daddr, max;
int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; int rc, nbperpage = sbi->nbperpage;
struct bmap *mp = JFS_SBI(ip->i_sb)->bmap; struct bmap *bmp = sbi->bmap;
int ag;
/* get the number of blocks to initially attempt to allocate. /* get the number of blocks to initially attempt to allocate.
* we'll first try the number of blocks requested unless this * we'll first try the number of blocks requested unless this
...@@ -524,7 +527,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) ...@@ -524,7 +527,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
* blocks in the map. in that case, we'll start off with the * blocks in the map. in that case, we'll start off with the
* maximum free. * maximum free.
*/ */
max = (s64) 1 << mp->db_maxfreebud; max = (s64) 1 << bmp->db_maxfreebud;
if (*nblocks >= max && *nblocks > nbperpage) if (*nblocks >= max && *nblocks > nbperpage)
nb = nblks = (max > nbperpage) ? max : nbperpage; nb = nblks = (max > nbperpage) ? max : nbperpage;
else else
...@@ -549,6 +552,18 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) ...@@ -549,6 +552,18 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
*nblocks = nb; *nblocks = nb;
*blkno = daddr; *blkno = daddr;
if (S_ISREG(ip->i_mode) && (ji->fileset == FILESYSTEM_I)) {
ag = BLKTOAG(daddr, sbi);
if (ji->active_ag == -1) {
atomic_inc(&bmp->db_active[ag]);
ji->active_ag = ag;
} else if (ji->active_ag != ag) {
atomic_dec(&bmp->db_active[ji->active_ag]);
atomic_inc(&bmp->db_active[ag]);
ji->active_ag = ag;
}
}
return (0); return (0);
} }
......
...@@ -429,6 +429,7 @@ int diRead(struct inode *ip) ...@@ -429,6 +429,7 @@ int diRead(struct inode *ip)
/* set the ag for the inode */ /* set the ag for the inode */
JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); JFS_IP(ip)->agno = BLKTOAG(agstart, sbi);
JFS_IP(ip)->active_ag = -1;
return (rc); return (rc);
} }
...@@ -1358,6 +1359,7 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) ...@@ -1358,6 +1359,7 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
DBG_DIALLOC(JFS_IP(ipimap)->i_imap, ip->i_ino); DBG_DIALLOC(JFS_IP(ipimap)->i_imap, ip->i_ino);
jfs_ip->ixpxd = iagp->inoext[extno]; jfs_ip->ixpxd = iagp->inoext[extno];
jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
jfs_ip->active_ag = -1;
} }
...@@ -1413,6 +1415,21 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) ...@@ -1413,6 +1415,21 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip)
* moving backward on the disk.) compute the hint within the * moving backward on the disk.) compute the hint within the
* file system and the iag. * file system and the iag.
*/ */
/* get the ag number of this iag */
agno = JFS_IP(pip)->agno;
if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
/*
* There is an open file actively growing. We want to
* allocate new inodes from a different ag to avoid
* fragmentation problems.
*/
agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
AG_LOCK(imap, agno);
goto tryag;
}
inum = pip->i_ino + 1; inum = pip->i_ino + 1;
ino = inum & (INOSPERIAG - 1); ino = inum & (INOSPERIAG - 1);
...@@ -1420,9 +1437,6 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) ...@@ -1420,9 +1437,6 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip)
if (ino == 0) if (ino == 0)
inum = pip->i_ino; inum = pip->i_ino;
/* get the ag number of this iag */
agno = JFS_IP(pip)->agno;
/* lock the AG inode map information */ /* lock the AG inode map information */
AG_LOCK(imap, agno); AG_LOCK(imap, agno);
......
...@@ -49,7 +49,7 @@ struct jfs_inode_info { ...@@ -49,7 +49,7 @@ struct jfs_inode_info {
long cflag; /* commit flags */ long cflag; /* commit flags */
u16 bxflag; /* xflag of pseudo buffer? */ u16 bxflag; /* xflag of pseudo buffer? */
unchar agno; /* ag number */ unchar agno; /* ag number */
unchar pad; /* pad */ signed char active_ag; /* ag currently allocating from */
lid_t blid; /* lid of pseudo buffer? */ lid_t blid; /* lid of pseudo buffer? */
lid_t atlhead; /* anonymous tlock list head */ lid_t atlhead; /* anonymous tlock list head */
lid_t atltail; /* anonymous tlock list tail */ lid_t atltail; /* anonymous tlock list tail */
......
...@@ -406,6 +406,7 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) ...@@ -406,6 +406,7 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
init_rwsem(&jfs_ip->rdwrlock); init_rwsem(&jfs_ip->rdwrlock);
init_MUTEX(&jfs_ip->commit_sem); init_MUTEX(&jfs_ip->commit_sem);
jfs_ip->atlhead = 0; jfs_ip->atlhead = 0;
jfs_ip->active_ag = -1;
inode_init_once(&jfs_ip->vfs_inode); inode_init_once(&jfs_ip->vfs_inode);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment