Commit c8e0b00e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  jbd2: call __jbd2_log_start_commit with j_state_lock write locked
  ext4: serialize unaligned asynchronous DIO
  ext4: make grpinfo slab cache names static
  ext4: Fix data corruption with multi-block writepages support
  ext4: fix up ext4 error handling
  ext4: unregister features interface on module unload
  ext4: fix panic on module unload when stopping lazyinit thread
parents 3c6c0d6c e4471831
...@@ -848,6 +848,7 @@ struct ext4_inode_info { ...@@ -848,6 +848,7 @@ struct ext4_inode_info {
atomic_t i_ioend_count; /* Number of outstanding io_end structs */ atomic_t i_ioend_count; /* Number of outstanding io_end structs */
/* current io_end structure for async DIO write*/ /* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio; ext4_io_end_t *cur_aio_dio;
atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
spinlock_t i_block_reservation_lock; spinlock_t i_block_reservation_lock;
...@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) ...@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
/* For ioend & aio unwritten conversion wait queues */
#define EXT4_WQ_HASH_SZ 37
#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
EXT4_WQ_HASH_SZ])
#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
EXT4_WQ_HASH_SZ])
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _EXT4_H */ #endif /* _EXT4_H */
...@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* that this IO needs to convertion to written when IO is * that this IO needs to convertion to written when IO is
* completed * completed
*/ */
if (io) if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
io->flag = EXT4_IO_END_UNWRITTEN; io->flag = EXT4_IO_END_UNWRITTEN;
else atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
} else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
if (ext4_should_dioread_nolock(inode)) if (ext4_should_dioread_nolock(inode))
map->m_flags |= EXT4_MAP_UNINIT; map->m_flags |= EXT4_MAP_UNINIT;
...@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* that we need to perform convertion when IO is done. * that we need to perform convertion when IO is done.
*/ */
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
if (io) if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
io->flag = EXT4_IO_END_UNWRITTEN; io->flag = EXT4_IO_END_UNWRITTEN;
else atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
} else
ext4_set_inode_state(inode, ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN); EXT4_STATE_DIO_UNWRITTEN);
} }
......
...@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp) ...@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
return 0; return 0;
} }
static void ext4_aiodio_wait(struct inode *inode)
{
wait_queue_head_t *wq = ext4_ioend_wq(inode);
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
}
/*
* This tests whether the IO in question is block-aligned or not.
* Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
* are converted to written only after the IO is complete. Until they are
* mapped, these blocks appear as holes, so dio_zero_block() will assume that
* it needs to zero out portions of the start and/or end block. If 2 AIO
* threads are at work on the same unwritten block, they must be synchronized
* or one thread will zero the other's data, causing corruption.
*/
static int
ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct super_block *sb = inode->i_sb;
int blockmask = sb->s_blocksize - 1;
size_t count = iov_length(iov, nr_segs);
loff_t final_size = pos + count;
if (pos >= inode->i_size)
return 0;
if ((pos & blockmask) || (final_size & blockmask))
return 1;
return 0;
}
static ssize_t static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov, ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos) unsigned long nr_segs, loff_t pos)
{ {
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
int unaligned_aio = 0;
int ret;
/* /*
* If we have encountered a bitmap-format file, the size limit * If we have encountered a bitmap-format file, the size limit
...@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
nr_segs = iov_shorten((struct iovec *)iov, nr_segs, nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
sbi->s_bitmap_maxbytes - pos); sbi->s_bitmap_maxbytes - pos);
} }
} else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
!is_sync_kiocb(iocb))) {
unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
} }
return generic_file_aio_write(iocb, iov, nr_segs, pos); /* Unaligned direct AIO must be serialized; see comment above */
if (unaligned_aio) {
static unsigned long unaligned_warn_time;
/* Warn about this once per day */
if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
ext4_msg(inode->i_sb, KERN_WARNING,
"Unaligned AIO/DIO on inode %ld by %s; "
"performance will be poor.",
inode->i_ino, current->comm);
mutex_lock(ext4_aio_mutex(inode));
ext4_aiodio_wait(inode);
}
ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
if (unaligned_aio)
mutex_unlock(ext4_aio_mutex(inode));
return ret;
} }
static const struct vm_operations_struct ext4_file_vm_ops = { static const struct vm_operations_struct ext4_file_vm_ops = {
......
...@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep; ...@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
/* We create slab caches for groupinfo data structures based on the /* We create slab caches for groupinfo data structures based on the
* superblock block size. There will be one per mounted filesystem for * superblock block size. There will be one per mounted filesystem for
* each unique s_blocksize_bits */ * each unique s_blocksize_bits */
#define NR_GRPINFO_CACHES \ #define NR_GRPINFO_CACHES 8
(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
"ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
"ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
"ext4_groupinfo_64k", "ext4_groupinfo_128k"
};
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group); ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
...@@ -2414,6 +2419,55 @@ static int ext4_mb_init_backend(struct super_block *sb) ...@@ -2414,6 +2419,55 @@ static int ext4_mb_init_backend(struct super_block *sb)
return -ENOMEM; return -ENOMEM;
} }
static void ext4_groupinfo_destroy_slabs(void)
{
int i;
for (i = 0; i < NR_GRPINFO_CACHES; i++) {
if (ext4_groupinfo_caches[i])
kmem_cache_destroy(ext4_groupinfo_caches[i]);
ext4_groupinfo_caches[i] = NULL;
}
}
static int ext4_groupinfo_create_slab(size_t size)
{
static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
int slab_size;
int blocksize_bits = order_base_2(size);
int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
struct kmem_cache *cachep;
if (cache_index >= NR_GRPINFO_CACHES)
return -EINVAL;
if (unlikely(cache_index < 0))
cache_index = 0;
mutex_lock(&ext4_grpinfo_slab_create_mutex);
if (ext4_groupinfo_caches[cache_index]) {
mutex_unlock(&ext4_grpinfo_slab_create_mutex);
return 0; /* Already created */
}
slab_size = offsetof(struct ext4_group_info,
bb_counters[blocksize_bits + 2]);
cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
slab_size, 0, SLAB_RECLAIM_ACCOUNT,
NULL);
mutex_unlock(&ext4_grpinfo_slab_create_mutex);
if (!cachep) {
printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
return -ENOMEM;
}
ext4_groupinfo_caches[cache_index] = cachep;
return 0;
}
int ext4_mb_init(struct super_block *sb, int needs_recovery) int ext4_mb_init(struct super_block *sb, int needs_recovery)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
...@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) ...@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
unsigned offset; unsigned offset;
unsigned max; unsigned max;
int ret; int ret;
int cache_index;
struct kmem_cache *cachep;
char *namep = NULL;
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
...@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) ...@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
goto out; goto out;
} }
cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; ret = ext4_groupinfo_create_slab(sb->s_blocksize);
cachep = ext4_groupinfo_caches[cache_index]; if (ret < 0)
if (!cachep) { goto out;
char name[32];
int len = offsetof(struct ext4_group_info,
bb_counters[sb->s_blocksize_bits + 2]);
sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
namep = kstrdup(name, GFP_KERNEL);
if (!namep) {
ret = -ENOMEM;
goto out;
}
/* Need to free the kmem_cache_name() when we
* destroy the slab */
cachep = kmem_cache_create(namep, len, 0,
SLAB_RECLAIM_ACCOUNT, NULL);
if (!cachep) {
ret = -ENOMEM;
goto out;
}
ext4_groupinfo_caches[cache_index] = cachep;
}
/* order 0 is regular bitmap */ /* order 0 is regular bitmap */
sbi->s_mb_maxs[0] = sb->s_blocksize << 3; sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
...@@ -2520,7 +2550,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) ...@@ -2520,7 +2550,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
if (ret) { if (ret) {
kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs); kfree(sbi->s_mb_maxs);
kfree(namep);
} }
return ret; return ret;
} }
...@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void) ...@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)
void ext4_exit_mballoc(void) void ext4_exit_mballoc(void)
{ {
int i;
/* /*
* Wait for completion of call_rcu()'s on ext4_pspace_cachep * Wait for completion of call_rcu()'s on ext4_pspace_cachep
* before destroying the slab cache. * before destroying the slab cache.
...@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void) ...@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep); kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep); kmem_cache_destroy(ext4_free_ext_cachep);
ext4_groupinfo_destroy_slabs();
for (i = 0; i < NR_GRPINFO_CACHES; i++) {
struct kmem_cache *cachep = ext4_groupinfo_caches[i];
if (cachep) {
char *name = (char *)kmem_cache_name(cachep);
kmem_cache_destroy(cachep);
kfree(name);
}
}
ext4_remove_debugfs_entry(); ext4_remove_debugfs_entry();
} }
......
...@@ -32,14 +32,8 @@ ...@@ -32,14 +32,8 @@
static struct kmem_cache *io_page_cachep, *io_end_cachep; static struct kmem_cache *io_page_cachep, *io_end_cachep;
#define WQ_HASH_SZ 37
#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
int __init ext4_init_pageio(void) int __init ext4_init_pageio(void)
{ {
int i;
io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
if (io_page_cachep == NULL) if (io_page_cachep == NULL)
return -ENOMEM; return -ENOMEM;
...@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void) ...@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void)
kmem_cache_destroy(io_page_cachep); kmem_cache_destroy(io_page_cachep);
return -ENOMEM; return -ENOMEM;
} }
for (i = 0; i < WQ_HASH_SZ; i++)
init_waitqueue_head(&ioend_wq[i]);
return 0; return 0;
} }
...@@ -62,7 +53,7 @@ void ext4_exit_pageio(void) ...@@ -62,7 +53,7 @@ void ext4_exit_pageio(void)
void ext4_ioend_wait(struct inode *inode) void ext4_ioend_wait(struct inode *inode)
{ {
wait_queue_head_t *wq = to_ioend_wq(inode); wait_queue_head_t *wq = ext4_ioend_wq(inode);
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
} }
...@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io) ...@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
for (i = 0; i < io->num_io_pages; i++) for (i = 0; i < io->num_io_pages; i++)
put_io_page(io->pages[i]); put_io_page(io->pages[i]);
io->num_io_pages = 0; io->num_io_pages = 0;
wq = to_ioend_wq(io->inode); wq = ext4_ioend_wq(io->inode);
if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
waitqueue_active(wq)) waitqueue_active(wq))
wake_up_all(wq); wake_up_all(wq);
...@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io) ...@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
struct inode *inode = io->inode; struct inode *inode = io->inode;
loff_t offset = io->offset; loff_t offset = io->offset;
ssize_t size = io->size; ssize_t size = io->size;
wait_queue_head_t *wq;
int ret = 0; int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
...@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io) ...@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
if (io->iocb) if (io->iocb)
aio_complete(io->iocb, io->result, 0); aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */ /* clear the DIO AIO unwritten flag */
io->flag &= ~EXT4_IO_END_UNWRITTEN; if (io->flag & EXT4_IO_END_UNWRITTEN) {
io->flag &= ~EXT4_IO_END_UNWRITTEN;
/* Wake up anyone waiting on unwritten extent conversion */
wq = ext4_ioend_wq(io->inode);
if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
waitqueue_active(wq)) {
wake_up_all(wq);
}
}
return ret; return ret;
} }
...@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error) ...@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error)
struct inode *inode; struct inode *inode;
unsigned long flags; unsigned long flags;
int i; int i;
sector_t bi_sector = bio->bi_sector;
BUG_ON(!io_end); BUG_ON(!io_end);
bio->bi_private = NULL; bio->bi_private = NULL;
...@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error) ...@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error)
if (error) if (error)
SetPageError(page); SetPageError(page);
BUG_ON(!head); BUG_ON(!head);
if (head->b_size == PAGE_CACHE_SIZE) if (head->b_size != PAGE_CACHE_SIZE) {
clear_buffer_dirty(head);
else {
loff_t offset; loff_t offset;
loff_t io_end_offset = io_end->offset + io_end->size; loff_t io_end_offset = io_end->offset + io_end->size;
...@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error) ...@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error)
if (error) if (error)
buffer_io_error(bh); buffer_io_error(bh);
clear_buffer_dirty(bh);
} }
if (buffer_delay(bh)) if (buffer_delay(bh))
partial_write = 1; partial_write = 1;
...@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error) ...@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error)
(unsigned long long) io_end->offset, (unsigned long long) io_end->offset,
(long) io_end->size, (long) io_end->size,
(unsigned long long) (unsigned long long)
bio->bi_sector >> (inode->i_blkbits - 9)); bi_sector >> (inode->i_blkbits - 9));
} }
/* Add the io_end to per-inode completed io list*/ /* Add the io_end to per-inode completed io list*/
...@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, ...@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
blocksize = 1 << inode->i_blkbits; blocksize = 1 << inode->i_blkbits;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page)); BUG_ON(PageWriteback(page));
set_page_writeback(page); set_page_writeback(page);
ClearPageError(page); ClearPageError(page);
...@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io, ...@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
for (bh = head = page_buffers(page), block_start = 0; for (bh = head = page_buffers(page), block_start = 0;
bh != head || !block_start; bh != head || !block_start;
block_start = block_end, bh = bh->b_this_page) { block_start = block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize; block_end = block_start + blocksize;
if (block_start >= len) { if (block_start >= len) {
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
continue; continue;
} }
clear_buffer_dirty(bh);
ret = io_submit_add_bh(io, io_page, inode, wbc, bh); ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
if (ret) { if (ret) {
/* /*
......
...@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, ...@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data); const char *dev_name, void *data);
static void ext4_destroy_lazyinit_thread(void); static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb); static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = { static struct file_system_type ext3_fs_type = {
...@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ...@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->i_sync_tid = 0; ei->i_sync_tid = 0;
ei->i_datasync_tid = 0; ei->i_datasync_tid = 0;
atomic_set(&ei->i_ioend_count, 0); atomic_set(&ei->i_ioend_count, 0);
atomic_set(&ei->i_aiodio_unwritten, 0);
return &ei->vfs_inode; return &ei->vfs_inode;
} }
...@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb) ...@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb)
mutex_unlock(&ext4_li_info->li_list_mtx); mutex_unlock(&ext4_li_info->li_list_mtx);
} }
static struct task_struct *ext4_lazyinit_task;
/* /*
* This is the function where ext4lazyinit thread lives. It walks * This is the function where ext4lazyinit thread lives. It walks
* through the request list searching for next scheduled filesystem. * through the request list searching for next scheduled filesystem.
...@@ -2784,6 +2788,10 @@ static int ext4_lazyinit_thread(void *arg) ...@@ -2784,6 +2788,10 @@ static int ext4_lazyinit_thread(void *arg)
if (time_before(jiffies, next_wakeup)) if (time_before(jiffies, next_wakeup))
schedule(); schedule();
finish_wait(&eli->li_wait_daemon, &wait); finish_wait(&eli->li_wait_daemon, &wait);
if (kthread_should_stop()) {
ext4_clear_request_list();
goto exit_thread;
}
} }
exit_thread: exit_thread:
...@@ -2808,6 +2816,7 @@ static int ext4_lazyinit_thread(void *arg) ...@@ -2808,6 +2816,7 @@ static int ext4_lazyinit_thread(void *arg)
wake_up(&eli->li_wait_task); wake_up(&eli->li_wait_task);
kfree(ext4_li_info); kfree(ext4_li_info);
ext4_lazyinit_task = NULL;
ext4_li_info = NULL; ext4_li_info = NULL;
mutex_unlock(&ext4_li_mtx); mutex_unlock(&ext4_li_mtx);
...@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void) ...@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void)
static int ext4_run_lazyinit_thread(void) static int ext4_run_lazyinit_thread(void)
{ {
struct task_struct *t; ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
ext4_li_info, "ext4lazyinit");
t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); if (IS_ERR(ext4_lazyinit_task)) {
if (IS_ERR(t)) { int err = PTR_ERR(ext4_lazyinit_task);
int err = PTR_ERR(t);
ext4_clear_request_list(); ext4_clear_request_list();
del_timer_sync(&ext4_li_info->li_timer); del_timer_sync(&ext4_li_info->li_timer);
kfree(ext4_li_info); kfree(ext4_li_info);
...@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void) ...@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void)
* If thread exited earlier * If thread exited earlier
* there's nothing to be done. * there's nothing to be done.
*/ */
if (!ext4_li_info) if (!ext4_li_info || !ext4_lazyinit_task)
return; return;
ext4_clear_request_list(); kthread_stop(ext4_lazyinit_task);
while (ext4_li_info->li_task) {
wake_up(&ext4_li_info->li_wait_daemon);
wait_event(ext4_li_info->li_wait_task,
ext4_li_info->li_task == NULL);
}
} }
static int ext4_fill_super(struct super_block *sb, void *data, int silent) static int ext4_fill_super(struct super_block *sb, void *data, int silent)
...@@ -4768,7 +4770,7 @@ static struct file_system_type ext4_fs_type = { ...@@ -4768,7 +4770,7 @@ static struct file_system_type ext4_fs_type = {
.fs_flags = FS_REQUIRES_DEV, .fs_flags = FS_REQUIRES_DEV,
}; };
int __init ext4_init_feat_adverts(void) static int __init ext4_init_feat_adverts(void)
{ {
struct ext4_features *ef; struct ext4_features *ef;
int ret = -ENOMEM; int ret = -ENOMEM;
...@@ -4792,23 +4794,44 @@ int __init ext4_init_feat_adverts(void) ...@@ -4792,23 +4794,44 @@ int __init ext4_init_feat_adverts(void)
return ret; return ret;
} }
static void ext4_exit_feat_adverts(void)
{
kobject_put(&ext4_feat->f_kobj);
wait_for_completion(&ext4_feat->f_kobj_unregister);
kfree(ext4_feat);
}
/* Shared across all ext4 file systems */
wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
static int __init ext4_init_fs(void) static int __init ext4_init_fs(void)
{ {
int err; int i, err;
ext4_check_flag_values(); ext4_check_flag_values();
for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
mutex_init(&ext4__aio_mutex[i]);
init_waitqueue_head(&ext4__ioend_wq[i]);
}
err = ext4_init_pageio(); err = ext4_init_pageio();
if (err) if (err)
return err; return err;
err = ext4_init_system_zone(); err = ext4_init_system_zone();
if (err) if (err)
goto out5; goto out7;
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
if (!ext4_kset) if (!ext4_kset)
goto out4; goto out6;
ext4_proc_root = proc_mkdir("fs/ext4", NULL); ext4_proc_root = proc_mkdir("fs/ext4", NULL);
if (!ext4_proc_root)
goto out5;
err = ext4_init_feat_adverts(); err = ext4_init_feat_adverts();
if (err)
goto out4;
err = ext4_init_mballoc(); err = ext4_init_mballoc();
if (err) if (err)
...@@ -4838,12 +4861,14 @@ static int __init ext4_init_fs(void) ...@@ -4838,12 +4861,14 @@ static int __init ext4_init_fs(void)
out2: out2:
ext4_exit_mballoc(); ext4_exit_mballoc();
out3: out3:
kfree(ext4_feat); ext4_exit_feat_adverts();
out4:
remove_proc_entry("fs/ext4", NULL); remove_proc_entry("fs/ext4", NULL);
out5:
kset_unregister(ext4_kset); kset_unregister(ext4_kset);
out4: out6:
ext4_exit_system_zone(); ext4_exit_system_zone();
out5: out7:
ext4_exit_pageio(); ext4_exit_pageio();
return err; return err;
} }
...@@ -4857,6 +4882,7 @@ static void __exit ext4_exit_fs(void) ...@@ -4857,6 +4882,7 @@ static void __exit ext4_exit_fs(void)
destroy_inodecache(); destroy_inodecache();
ext4_exit_xattr(); ext4_exit_xattr();
ext4_exit_mballoc(); ext4_exit_mballoc();
ext4_exit_feat_adverts();
remove_proc_entry("fs/ext4", NULL); remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset); kset_unregister(ext4_kset);
ext4_exit_system_zone(); ext4_exit_system_zone();
......
...@@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal) ...@@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal)
} }
/* /*
* Called under j_state_lock. Returns true if a transaction commit was started. * Called with j_state_lock locked for writing.
* Returns true if a transaction commit was started.
*/ */
int __jbd2_log_start_commit(journal_t *journal, tid_t target) int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{ {
...@@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal) ...@@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
{ {
transaction_t *transaction = NULL; transaction_t *transaction = NULL;
tid_t tid; tid_t tid;
int need_to_start = 0;
read_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
if (journal->j_running_transaction && !current->journal_info) { if (journal->j_running_transaction && !current->journal_info) {
transaction = journal->j_running_transaction; transaction = journal->j_running_transaction;
__jbd2_log_start_commit(journal, transaction->t_tid); if (!tid_geq(journal->j_commit_request, transaction->t_tid))
need_to_start = 1;
} else if (journal->j_committing_transaction) } else if (journal->j_committing_transaction)
transaction = journal->j_committing_transaction; transaction = journal->j_committing_transaction;
...@@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal) ...@@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
tid = transaction->t_tid; tid = transaction->t_tid;
read_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
if (need_to_start)
jbd2_log_start_commit(journal, tid);
jbd2_log_wait_commit(journal, tid); jbd2_log_wait_commit(journal, tid);
return 1; return 1;
} }
......
...@@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction) ...@@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction)
static int start_this_handle(journal_t *journal, handle_t *handle, static int start_this_handle(journal_t *journal, handle_t *handle,
int gfp_mask) int gfp_mask)
{ {
transaction_t *transaction; transaction_t *transaction, *new_transaction = NULL;
int needed; tid_t tid;
int nblocks = handle->h_buffer_credits; int needed, need_to_start;
transaction_t *new_transaction = NULL; int nblocks = handle->h_buffer_credits;
if (nblocks > journal->j_max_transaction_buffers) { if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
...@@ -222,8 +222,11 @@ static int start_this_handle(journal_t *journal, handle_t *handle, ...@@ -222,8 +222,11 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
atomic_sub(nblocks, &transaction->t_outstanding_credits); atomic_sub(nblocks, &transaction->t_outstanding_credits);
prepare_to_wait(&journal->j_wait_transaction_locked, &wait, prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
__jbd2_log_start_commit(journal, transaction->t_tid); tid = transaction->t_tid;
need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
if (need_to_start)
jbd2_log_start_commit(journal, tid);
schedule(); schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait); finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat; goto repeat;
...@@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) ...@@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal; journal_t *journal = transaction->t_journal;
int ret; tid_t tid;
int need_to_start, ret;
/* If we've had an abort of any type, don't even think about /* If we've had an abort of any type, don't even think about
* actually doing the restart! */ * actually doing the restart! */
...@@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) ...@@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
jbd_debug(2, "restarting handle %p\n", handle); jbd_debug(2, "restarting handle %p\n", handle);
__jbd2_log_start_commit(journal, transaction->t_tid); tid = transaction->t_tid;
need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
if (need_to_start)
jbd2_log_start_commit(journal, tid);
lock_map_release(&handle->h_lockdep_map); lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks; handle->h_buffer_credits = nblocks;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment