Commit 03844e4b authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] shmem: avoid metadata leakiness

akpm and wli each discovered unfortunate behaviour of dbench on tmpfs:
after tmpfs has reached its data memory limit, dbench continues to
lseek and write, and tmpfs carries on allocating unlimited metadata
blocks to accommodate the data it then refuses.  That particular
behaviour could be simply fixed by checking earlier; but I think tmpfs
metablocks should be subject to the memory limit, and included in df
and du accounting.  Also, manipulate inode->i_blocks under lock, was
missed before.
parent 7aa8800b
......@@ -14,7 +14,8 @@ struct shmem_inode_info {
unsigned long next_index;
swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
void **i_indirect; /* indirect blocks */
unsigned long swapped;
unsigned long alloced; /* data pages allocated to file */
unsigned long swapped; /* subtotal assigned to swap */
unsigned long flags;
struct list_head list;
struct inode vfs_inode;
......
......@@ -68,38 +68,42 @@ LIST_HEAD (shmem_inodes);
static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */
static void shmem_free_block(struct inode *inode)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
spin_lock(&sbinfo->stat_lock);
sbinfo->free_blocks++;
inode->i_blocks -= BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
}
/*
* shmem_recalc_inode - recalculate the size of an inode
*
* @inode: inode to recalc
* @swap: additional swap pages freed externally
*
* We have to calculate the free blocks since the mm can drop pages
* behind our back
* We have to calculate the free blocks since the mm can drop
* undirtied hole pages behind our back. Later we should be
* able to use the releasepage method to handle this better.
*
* But we know that normally
* inodes->i_blocks/BLOCKS_PER_PAGE ==
* inode->i_mapping->nrpages + info->swapped
*
* So the mm freed
* inodes->i_blocks/BLOCKS_PER_PAGE -
* (inode->i_mapping->nrpages + info->swapped)
* But normally info->alloced == inode->i_mapping->nrpages + info->swapped
* So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
*
* It has to be called with the spinlock held.
*/
static void shmem_recalc_inode(struct inode * inode)
{
unsigned long freed;
struct shmem_inode_info *info = SHMEM_I(inode);
long freed;
freed = (inode->i_blocks/BLOCKS_PER_PAGE) -
(inode->i_mapping->nrpages + SHMEM_I(inode)->swapped);
if (freed){
struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
inode->i_blocks -= freed*BLOCKS_PER_PAGE;
spin_lock (&sbinfo->stat_lock);
freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
if (freed > 0) {
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
info->alloced -= freed;
spin_lock(&sbinfo->stat_lock);
sbinfo->free_blocks += freed;
spin_unlock (&sbinfo->stat_lock);
inode->i_blocks -= freed*BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
}
}
......@@ -196,6 +200,8 @@ static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long
*/
static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index)
{
struct inode *inode = &info->vfs_inode;
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
unsigned long page = 0;
swp_entry_t *entry;
......@@ -204,14 +210,33 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
entry = ERR_PTR(-EFAULT);
break;
}
/*
* Test free_blocks against 1 not 0, since we have 1 data
* page (and perhaps indirect index pages) yet to allocate:
* a waste to allocate index if we cannot allocate data.
*/
spin_lock(&sbinfo->stat_lock);
if (sbinfo->free_blocks <= 1) {
spin_unlock(&sbinfo->stat_lock);
return ERR_PTR(-ENOSPC);
}
sbinfo->free_blocks--;
inode->i_blocks += BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
spin_unlock(&info->lock);
page = get_zeroed_page(GFP_USER);
spin_lock(&info->lock);
if (!page)
if (!page) {
shmem_free_block(inode);
return ERR_PTR(-ENOMEM);
}
}
if (page) {
/* another task gave its page, or truncated the file */
shmem_free_block(inode);
free_page(page);
}
return entry;
......@@ -243,41 +268,42 @@ static int shmem_free_swp(swp_entry_t *dir, unsigned int count)
* shmem_truncate_direct - free the swap entries of a whole doubly
* indirect block
*
* @info: the info structure of the inode
* @dir: pointer to the pointer to the block
* @start: offset to start from (in pages)
* @len: how many pages are stored in this block
*
* Returns the number of freed swap entries.
*/
static inline unsigned long
shmem_truncate_direct(swp_entry_t *** dir, unsigned long start, unsigned long len) {
static inline unsigned long
shmem_truncate_direct(struct shmem_inode_info *info, swp_entry_t ***dir, unsigned long start, unsigned long len)
{
swp_entry_t **last, **ptr;
unsigned long off, freed = 0;
if (!*dir)
return 0;
unsigned long off, freed_swp, freed = 0;
last = *dir + (len + ENTRIES_PER_PAGE-1) / ENTRIES_PER_PAGE;
off = start % ENTRIES_PER_PAGE;
for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++) {
if (!*ptr) {
off = 0;
for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++, off = 0) {
if (!*ptr)
continue;
if (info->swapped) {
freed_swp = shmem_free_swp(*ptr + off,
ENTRIES_PER_PAGE - off);
info->swapped -= freed_swp;
freed += freed_swp;
}
if (!off) {
freed += shmem_free_swp(*ptr, ENTRIES_PER_PAGE);
free_page ((unsigned long) *ptr);
info->alloced++;
free_page((unsigned long) *ptr);
*ptr = 0;
} else {
freed += shmem_free_swp(*ptr+off,ENTRIES_PER_PAGE-off);
off = 0;
}
}
if (!start) {
info->alloced++;
free_page((unsigned long) *dir);
*dir = 0;
}
......@@ -299,11 +325,16 @@ shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index)
swp_entry_t ***base;
unsigned long baseidx, len, start;
unsigned long max = info->next_index-1;
unsigned long freed;
if (max < SHMEM_NR_DIRECT) {
info->next_index = index;
return shmem_free_swp(info->i_direct + index,
SHMEM_NR_DIRECT - index);
if (!info->swapped)
return 0;
freed = shmem_free_swp(info->i_direct + index,
SHMEM_NR_DIRECT - index);
info->swapped -= freed;
return freed;
}
if (max < ENTRIES_PER_PAGE * ENTRIES_PER_PAGE/2 + SHMEM_NR_DIRECT) {
......@@ -329,24 +360,21 @@ shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index)
info->next_index = baseidx;
start = 0;
}
return shmem_truncate_direct(base, start, len);
return *base? shmem_truncate_direct(info, base, start, len): 0;
}
static void shmem_truncate (struct inode * inode)
static void shmem_truncate(struct inode *inode)
{
struct shmem_inode_info *info = SHMEM_I(inode);
unsigned long index;
unsigned long freed = 0;
struct shmem_inode_info * info = SHMEM_I(inode);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
spin_lock (&info->lock);
while (index < info->next_index)
freed += shmem_truncate_indirect(info, index);
info->swapped -= freed;
spin_lock(&info->lock);
while (index < info->next_index)
(void) shmem_truncate_indirect(info, index);
shmem_recalc_inode(inode);
spin_unlock (&info->lock);
spin_unlock(&info->lock);
}
static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
......@@ -407,6 +435,7 @@ static void shmem_delete_inode(struct inode * inode)
inode->i_size = 0;
shmem_truncate (inode);
}
BUG_ON(inode->i_blocks);
spin_lock (&sbinfo->stat_lock);
sbinfo->free_inodes++;
spin_unlock (&sbinfo->stat_lock);
......@@ -663,13 +692,12 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **p
return -ENOSPC;
}
sbinfo->free_blocks--;
inode->i_blocks += BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
page = page_cache_alloc(mapping);
if (!page) {
spin_lock(&sbinfo->stat_lock);
sbinfo->free_blocks++;
spin_unlock(&sbinfo->stat_lock);
shmem_free_block(inode);
return -ENOMEM;
}
......@@ -681,16 +709,14 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **p
add_to_page_cache_lru(page, mapping, idx) < 0) {
spin_unlock(&info->lock);
page_cache_release(page);
spin_lock(&sbinfo->stat_lock);
sbinfo->free_blocks++;
spin_unlock(&sbinfo->stat_lock);
shmem_free_block(inode);
if (error)
return error;
goto repeat;
}
info->alloced++;
spin_unlock(&info->lock);
clear_highpage(page);
inode->i_blocks += BLOCKS_PER_PAGE;
}
/* We have the page */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment