Commit 14516bb7 authored by Dmitry Monakhov's avatar Dmitry Monakhov Committed by Theodore Ts'o

ext4: fix suboptimal seek_{data,hole} extents traversial

It is ridiculous practice to scan inode block by block, this technique
applicable only for old indirect files. This takes significant amount
of time for really large files. Let's reuse ext4_fiemap which already
traverse inode-tree in most optimal meaner.

TESTCASE:
ftruncate64(fd, 0);
ftruncate64(fd, 1ULL << 40);
/* lseek will spin very long time */
lseek64(fd, 0, SEEK_DATA);
lseek64(fd, 0, SEEK_HOLE);

Original report: https://lkml.org/lkml/2014/10/16/620Signed-off-by: default avatarDmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent d952d69e
...@@ -5166,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -5166,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
/* fallback to generic here if not in extents fmt */ /* fallback to generic here if not in extents fmt */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return generic_block_fiemap(inode, fieinfo, start, len, return __generic_block_fiemap(inode, fieinfo, start, len,
ext4_get_block); ext4_get_block);
if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
return -EBADR; return -EBADR;
......
...@@ -273,24 +273,19 @@ static int ext4_file_open(struct inode * inode, struct file * filp) ...@@ -273,24 +273,19 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
* we determine this extent as a data or a hole according to whether the * we determine this extent as a data or a hole according to whether the
* page cache has data or not. * page cache has data or not.
*/ */
static int ext4_find_unwritten_pgoff(struct inode *inode, static int ext4_find_unwritten_pgoff(struct inode *inode, int whence,
int whence, loff_t endoff, loff_t *offset)
struct ext4_map_blocks *map,
loff_t *offset)
{ {
struct pagevec pvec; struct pagevec pvec;
unsigned int blkbits;
pgoff_t index; pgoff_t index;
pgoff_t end; pgoff_t end;
loff_t endoff;
loff_t startoff; loff_t startoff;
loff_t lastoff; loff_t lastoff;
int found = 0; int found = 0;
blkbits = inode->i_sb->s_blocksize_bits;
startoff = *offset; startoff = *offset;
lastoff = startoff; lastoff = startoff;
endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
index = startoff >> PAGE_CACHE_SHIFT; index = startoff >> PAGE_CACHE_SHIFT;
end = endoff >> PAGE_CACHE_SHIFT; end = endoff >> PAGE_CACHE_SHIFT;
...@@ -408,147 +403,144 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, ...@@ -408,147 +403,144 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{ {
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct ext4_map_blocks map; struct fiemap_extent_info fie;
struct extent_status es; struct fiemap_extent ext[2];
ext4_lblk_t start, last, end; loff_t next;
loff_t dataoff, isize; int i, ret = 0;
int blkbits;
int ret = 0;
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
if (offset >= inode->i_size) {
isize = i_size_read(inode);
if (offset >= isize) {
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
return -ENXIO; return -ENXIO;
} }
fie.fi_flags = 0;
blkbits = inode->i_sb->s_blocksize_bits; fie.fi_extents_max = 2;
start = offset >> blkbits; fie.fi_extents_start = (struct fiemap_extent __user *) &ext;
last = start; while (1) {
end = isize >> blkbits; mm_segment_t old_fs = get_fs();
dataoff = offset;
fie.fi_extents_mapped = 0;
do { memset(ext, 0, sizeof(*ext) * fie.fi_extents_max);
map.m_lblk = last;
map.m_len = end - last + 1; set_fs(get_ds());
ret = ext4_map_blocks(NULL, inode, &map, 0); ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { set_fs(old_fs);
if (last != start) if (ret)
dataoff = (loff_t)last << blkbits;
break; break;
}
/* /* No extents found, EOF */
* If there is a delay extent at this offset, if (!fie.fi_extents_mapped) {
* it will be as a data. ret = -ENXIO;
*/
ext4_es_find_delayed_extent_range(inode, last, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
if (last != start)
dataoff = (loff_t)last << blkbits;
break; break;
} }
for (i = 0; i < fie.fi_extents_mapped; i++) {
next = (loff_t)(ext[i].fe_length + ext[i].fe_logical);
/* if (offset < (loff_t)ext[i].fe_logical)
* If there is a unwritten extent at this offset, offset = (loff_t)ext[i].fe_logical;
* it will be as a data or a hole according to page /*
* cache that has data or not. * If extent is not unwritten, then it contains valid
*/ * data, mapped or delayed.
if (map.m_flags & EXT4_MAP_UNWRITTEN) { */
int unwritten; if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN))
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, goto out;
&map, &dataoff);
if (unwritten)
break;
}
last++; /*
dataoff = (loff_t)last << blkbits; * If there is a unwritten extent at this offset,
} while (last <= end); * it will be as a data or a hole according to page
* cache that has data or not.
*/
if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
next, &offset))
goto out;
if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) {
ret = -ENXIO;
goto out;
}
offset = next;
}
}
if (offset > inode->i_size)
offset = inode->i_size;
out:
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
if (ret)
return ret;
if (dataoff > isize) return vfs_setpos(file, offset, maxsize);
return -ENXIO;
return vfs_setpos(file, dataoff, maxsize);
} }
/* /*
* ext4_seek_hole() retrieves the offset for SEEK_HOLE. * ext4_seek_hole() retrieves the offset for SEEK_HOLE
*/ */
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{ {
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct ext4_map_blocks map; struct fiemap_extent_info fie;
struct extent_status es; struct fiemap_extent ext[2];
ext4_lblk_t start, last, end; loff_t next;
loff_t holeoff, isize; int i, ret = 0;
int blkbits;
int ret = 0;
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
if (offset >= inode->i_size) {
isize = i_size_read(inode);
if (offset >= isize) {
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
return -ENXIO; return -ENXIO;
} }
blkbits = inode->i_sb->s_blocksize_bits; fie.fi_flags = 0;
start = offset >> blkbits; fie.fi_extents_max = 2;
last = start; fie.fi_extents_start = (struct fiemap_extent __user *)&ext;
end = isize >> blkbits; while (1) {
holeoff = offset; mm_segment_t old_fs = get_fs();
do { fie.fi_extents_mapped = 0;
map.m_lblk = last; memset(ext, 0, sizeof(*ext));
map.m_len = end - last + 1;
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
last += ret;
holeoff = (loff_t)last << blkbits;
continue;
}
/* set_fs(get_ds());
* If there is a delay extent at this offset, ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
* we will skip this extent. set_fs(old_fs);
*/ if (ret)
ext4_es_find_delayed_extent_range(inode, last, last, &es); break;
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
last = es.es_lblk + es.es_len;
holeoff = (loff_t)last << blkbits;
continue;
}
/* /* No extents found */
* If there is a unwritten extent at this offset, if (!fie.fi_extents_mapped)
* it will be as a data or a hole according to page break;
* cache that has data or not.
*/ for (i = 0; i < fie.fi_extents_mapped; i++) {
if (map.m_flags & EXT4_MAP_UNWRITTEN) { next = (loff_t)(ext[i].fe_logical + ext[i].fe_length);
int unwritten; /*
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, * If extent is not unwritten, then it contains valid
&map, &holeoff); * data, mapped or delayed.
if (!unwritten) { */
last += ret; if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
holeoff = (loff_t)last << blkbits; if (offset < (loff_t)ext[i].fe_logical)
goto out;
offset = next;
continue; continue;
} }
} /*
* If there is a unwritten extent at this offset,
/* find a hole */ * it will be as a data or a hole according to page
break; * cache that has data or not.
} while (last <= end); */
if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
next, &offset))
goto out;
offset = next;
if (ext[i].fe_flags & FIEMAP_EXTENT_LAST)
goto out;
}
}
if (offset > inode->i_size)
offset = inode->i_size;
out:
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
if (ret)
return ret;
if (holeoff > isize) return vfs_setpos(file, offset, maxsize);
holeoff = isize;
return vfs_setpos(file, holeoff, maxsize);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment