Commit 391f2a16 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Some locking and page fault bug fixes from Jan Kara, some ext4
  encryption fixes from me, and Li Xi's Project Quota commits"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  fs: clean up the flags definition in uapi/linux/fs.h
  ext4: add FS_IOC_FSSETXATTR/FS_IOC_FSGETXATTR interface support
  ext4: add project quota support
  ext4: adds project ID support
  ext4 crypto: simplify interfaces to directory entry insert functions
  ext4 crypto: add missing locking for keyring_key access
  ext4: use pre-zeroed blocks for DAX page faults
  ext4: implement allocation of pre-zeroed blocks
  ext4: provide ext4_issue_zeroout()
  ext4: get rid of EXT4_GET_BLOCKS_NO_LOCK flag
  ext4: document lock ordering
  ext4: fix races of writeback with punch hole and zero range
  ext4: fix races between buffered IO and collapse / insert range
  ext4: move unlocked dio protection from ext4_alloc_file_blocks()
  ext4: fix races between page faults and hole punching
parents d5ffdf8b 68ce7bfc
...@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page) ...@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page)
EXT4_DECRYPT, page->index, page, page); EXT4_DECRYPT, page->index, page, page);
} }
int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len)
{ {
struct ext4_crypto_ctx *ctx; struct ext4_crypto_ctx *ctx;
struct page *ciphertext_page = NULL; struct page *ciphertext_page = NULL;
struct bio *bio; struct bio *bio;
ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
ext4_fsblk_t pblk = ext4_ext_pblock(ex);
unsigned int len = ext4_ext_get_actual_len(ex);
int ret, err = 0; int ret, err = 0;
#if 0 #if 0
......
...@@ -213,9 +213,11 @@ int _ext4_get_encryption_info(struct inode *inode) ...@@ -213,9 +213,11 @@ int _ext4_get_encryption_info(struct inode *inode)
res = -ENOKEY; res = -ENOKEY;
goto out; goto out;
} }
down_read(&keyring_key->sem);
ukp = user_key_payload(keyring_key); ukp = user_key_payload(keyring_key);
if (ukp->datalen != sizeof(struct ext4_encryption_key)) { if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
res = -EINVAL; res = -EINVAL;
up_read(&keyring_key->sem);
goto out; goto out;
} }
master_key = (struct ext4_encryption_key *)ukp->data; master_key = (struct ext4_encryption_key *)ukp->data;
...@@ -226,10 +228,12 @@ int _ext4_get_encryption_info(struct inode *inode) ...@@ -226,10 +228,12 @@ int _ext4_get_encryption_info(struct inode *inode)
"ext4: key size incorrect: %d\n", "ext4: key size incorrect: %d\n",
master_key->size); master_key->size);
res = -ENOKEY; res = -ENOKEY;
up_read(&keyring_key->sem);
goto out; goto out;
} }
res = ext4_derive_key_aes(ctx.nonce, master_key->raw, res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
raw_key); raw_key);
up_read(&keyring_key->sem);
if (res) if (res)
goto out; goto out;
got_key: got_key:
......
...@@ -378,14 +378,22 @@ struct flex_groups { ...@@ -378,14 +378,22 @@ struct flex_groups {
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ #define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */ #define EXT4_FL_USER_MODIFIABLE 0x204380FF /* User modifiable flags */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
EXT4_IMMUTABLE_FL | \
EXT4_APPEND_FL | \
EXT4_NODUMP_FL | \
EXT4_NOATIME_FL | \
EXT4_PROJINHERIT_FL)
/* Flags that should be inherited by new inodes from their parent. */ /* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
EXT4_PROJINHERIT_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */ /* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL)) #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
...@@ -555,10 +563,12 @@ enum { ...@@ -555,10 +563,12 @@ enum {
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
/* Request will not result in inode size update (user for fallocate) */ /* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
/* Do not take i_data_sem locking in ext4_map_blocks */
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
/* Convert written extents to unwritten */ /* Convert written extents to unwritten */
#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200 #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100
/* Write zeros to newly created written extents */
#define EXT4_GET_BLOCKS_ZERO 0x0200
#define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
EXT4_GET_BLOCKS_ZERO)
/* /*
* The bit position of these flags must not overlap with any of the * The bit position of these flags must not overlap with any of the
...@@ -616,6 +626,46 @@ enum { ...@@ -616,6 +626,46 @@ enum {
#define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) #define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
#define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy) #define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
#ifndef FS_IOC_FSGETXATTR
/* Until the uapi changes get merged for project quota... */
#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
/*
* Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
*/
struct fsxattr {
__u32 fsx_xflags; /* xflags field value (get/set) */
__u32 fsx_extsize; /* extsize field value (get/set)*/
__u32 fsx_nextents; /* nextents field value (get) */
__u32 fsx_projid; /* project identifier (get/set) */
unsigned char fsx_pad[12];
};
/*
* Flags for the fsx_xflags field
*/
#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
#endif /* !defined(FS_IOC_FSGETXATTR) */
#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
#if defined(__KERNEL__) && defined(CONFIG_COMPAT) #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* /*
* ioctl commands in 32 bit emulation * ioctl commands in 32 bit emulation
...@@ -910,6 +960,15 @@ struct ext4_inode_info { ...@@ -910,6 +960,15 @@ struct ext4_inode_info {
* by other means, so we have i_data_sem. * by other means, so we have i_data_sem.
*/ */
struct rw_semaphore i_data_sem; struct rw_semaphore i_data_sem;
/*
* i_mmap_sem is for serializing page faults with truncate / punch hole
* operations. We have to make sure that new page cannot be faulted in
* a section of the inode that is being punched. We cannot easily use
* i_data_sem for this since we need protection for the whole punch
* operation and i_data_sem ranks below transaction start so we have
* to occasionally drop it.
*/
struct rw_semaphore i_mmap_sem;
struct inode vfs_inode; struct inode vfs_inode;
struct jbd2_inode *jinode; struct jbd2_inode *jinode;
...@@ -993,6 +1052,7 @@ struct ext4_inode_info { ...@@ -993,6 +1052,7 @@ struct ext4_inode_info {
/* Encryption params */ /* Encryption params */
struct ext4_crypt_info *i_crypt_info; struct ext4_crypt_info *i_crypt_info;
#endif #endif
kprojid_t i_projid;
}; };
/* /*
...@@ -1248,7 +1308,7 @@ struct ext4_super_block { ...@@ -1248,7 +1308,7 @@ struct ext4_super_block {
#endif #endif
/* Number of quota types we support */ /* Number of quota types we support */
#define EXT4_MAXQUOTAS 2 #define EXT4_MAXQUOTAS 3
/* /*
* fourth extended-fs super-block data in memory * fourth extended-fs super-block data in memory
...@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT) ...@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
EXT4_FEATURE_RO_COMPAT_QUOTA) EXT4_FEATURE_RO_COMPAT_QUOTA |\
EXT4_FEATURE_RO_COMPAT_PROJECT)
#define EXTN_FEATURE_FUNCS(ver) \ #define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \ static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
...@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb) ...@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
#define EXT4_DEF_RESUID 0 #define EXT4_DEF_RESUID 0
#define EXT4_DEF_RESGID 0 #define EXT4_DEF_RESGID 0
/*
* Default project ID
*/
#define EXT4_DEF_PROJID 0
#define EXT4_DEF_INODE_READAHEAD_BLKS 32 #define EXT4_DEF_INODE_READAHEAD_BLKS 32
/* /*
...@@ -2234,7 +2300,8 @@ void ext4_restore_control_page(struct page *data_page); ...@@ -2234,7 +2300,8 @@ void ext4_restore_control_page(struct page *data_page);
struct page *ext4_encrypt(struct inode *inode, struct page *ext4_encrypt(struct inode *inode,
struct page *plaintext_page); struct page *plaintext_page);
int ext4_decrypt(struct page *page); int ext4_decrypt(struct page *page);
int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len);
#ifdef CONFIG_EXT4_FS_ENCRYPTION #ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_init_crypto(void); int ext4_init_crypto(void);
...@@ -2440,8 +2507,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); ...@@ -2440,8 +2507,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock, int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_get_block_dax(struct inode *inode, sector_t iblock, int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock, int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
...@@ -2484,9 +2551,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); ...@@ -2484,9 +2551,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend); loff_t lstart, loff_t lend);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_update_reserve_space(struct inode *inode, extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim); int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len);
/* indirect.c */ /* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
...@@ -2848,6 +2919,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) ...@@ -2848,6 +2919,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
return changed; return changed;
} }
int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
loff_t len);
struct ext4_group_info { struct ext4_group_info {
unsigned long bb_state; unsigned long bb_state;
struct rb_root bb_free_root; struct rb_root bb_free_root;
...@@ -2986,8 +3060,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, ...@@ -2986,8 +3060,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
struct page *page); struct page *page);
extern int ext4_try_add_inline_entry(handle_t *handle, extern int ext4_try_add_inline_entry(handle_t *handle,
struct ext4_filename *fname, struct ext4_filename *fname,
struct dentry *dentry, struct inode *dir, struct inode *inode);
struct inode *inode);
extern int ext4_try_create_inline_dir(handle_t *handle, extern int ext4_try_create_inline_dir(handle_t *handle,
struct inode *parent, struct inode *parent,
struct inode *inode); struct inode *inode);
......
...@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) ...@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{ {
ext4_fsblk_t ee_pblock; ext4_fsblk_t ee_pblock;
unsigned int ee_len; unsigned int ee_len;
int ret;
ee_len = ext4_ext_get_actual_len(ex); ee_len = ext4_ext_get_actual_len(ex);
ee_pblock = ext4_ext_pblock(ex); ee_pblock = ext4_ext_pblock(ex);
return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
if (ext4_encrypted_inode(inode)) ee_len);
return ext4_encrypted_zeroout(inode, ex);
ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
if (ret > 0)
ret = 0;
return ret;
} }
/* /*
...@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, ...@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
} }
/* IO end_io complete, convert the filled extent to written */ /* IO end_io complete, convert the filled extent to written */
if (flags & EXT4_GET_BLOCKS_CONVERT) { if (flags & EXT4_GET_BLOCKS_CONVERT) {
if (flags & EXT4_GET_BLOCKS_ZERO) {
if (allocated > map->m_len)
allocated = map->m_len;
err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
allocated);
if (err < 0)
goto out2;
}
ret = ext4_convert_unwritten_extents_endio(handle, inode, map, ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
ppath); ppath);
if (ret >= 0) { if (ret >= 0) {
...@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, ...@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
if (len <= EXT_UNWRITTEN_MAX_LEN) if (len <= EXT_UNWRITTEN_MAX_LEN)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
/* /*
* credits to insert 1 extent into extent tree * credits to insert 1 extent into extent tree
*/ */
...@@ -4752,8 +4748,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, ...@@ -4752,8 +4748,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
goto retry; goto retry;
} }
ext4_inode_resume_unlocked_dio(inode);
return ret > 0 ? ret2 : ret; return ret > 0 ? ret2 : ret;
} }
...@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
int partial_begin, partial_end; int partial_begin, partial_end;
loff_t start, end; loff_t start, end;
ext4_lblk_t lblk; ext4_lblk_t lblk;
struct address_space *mapping = inode->i_mapping;
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
trace_ext4_zero_range(inode, offset, len, mode); trace_ext4_zero_range(inode, offset, len, mode);
...@@ -4785,17 +4778,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4785,17 +4778,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
return ret; return ret;
} }
/*
* Write out all dirty pages to avoid race conditions
* Then release them.
*/
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
ret = filemap_write_and_wait_range(mapping, offset,
offset + len - 1);
if (ret)
return ret;
}
/* /*
* Round up offset. This is not fallocate, we neet to zero out * Round up offset. This is not fallocate, we neet to zero out
* blocks, so convert interior block aligned part of the range to * blocks, so convert interior block aligned part of the range to
...@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (mode & FALLOC_FL_KEEP_SIZE) if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE; flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
/* Preallocate the range including the unaligned edges */ /* Preallocate the range including the unaligned edges */
if (partial_begin || partial_end) { if (partial_begin || partial_end) {
ret = ext4_alloc_file_blocks(file, ret = ext4_alloc_file_blocks(file,
...@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
round_down(offset, 1 << blkbits)) >> blkbits, round_down(offset, 1 << blkbits)) >> blkbits,
new_size, flags, mode); new_size, flags, mode);
if (ret) if (ret)
goto out_mutex; goto out_dio;
} }
...@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
EXT4_EX_NOCACHE); EXT4_EX_NOCACHE);
/* Now release the pages and zero block aligned part of pages*/ /*
* Prevent page faults from reinstantiating pages we have
* released from page cache.
*/
down_write(&EXT4_I(inode)->i_mmap_sem);
ret = ext4_update_disksize_before_punch(inode, offset, len);
if (ret) {
up_write(&EXT4_I(inode)->i_mmap_sem);
goto out_dio;
}
/* Now release the pages and zero block aligned part of pages */
truncate_pagecache_range(inode, start, end - 1); truncate_pagecache_range(inode, start, end - 1);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode); inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode); flags, mode);
up_write(&EXT4_I(inode)->i_mmap_sem);
if (ret) if (ret)
goto out_dio; goto out_dio;
} }
...@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out; goto out;
} }
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode); flags, mode);
ext4_inode_resume_unlocked_dio(inode);
if (ret) if (ret)
goto out; goto out;
...@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
return ret; return ret;
} }
/*
* Need to round down offset to be aligned with page size boundary
* for page size > block size.
*/
ioffset = round_down(offset, PAGE_SIZE);
/* Write out all dirty pages */
ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
LLONG_MAX);
if (ret)
return ret;
/* Take mutex lock */
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
/* /*
* There is no need to overlap collapse range with EOF, in which case * There is no need to overlap collapse range with EOF, in which case
* it is effectively a truncate operation * it is effectively a truncate operation
...@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
goto out_mutex; goto out_mutex;
} }
truncate_pagecache(inode, ioffset);
/* Wait for existing dio to complete */ /* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode); ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode); inode_dio_wait(inode);
/*
* Prevent page faults from reinstantiating pages we have released from
* page cache.
*/
down_write(&EXT4_I(inode)->i_mmap_sem);
/*
* Need to round down offset to be aligned with page size boundary
* for page size > block size.
*/
ioffset = round_down(offset, PAGE_SIZE);
/*
* Write tail of the last page before removed range since it will get
* removed from the page cache below.
*/
ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
if (ret)
goto out_mmap;
/*
* Write data that will be shifted to preserve them when discarding
* page cache below. We are also protected from pages becoming dirty
* by i_mmap_sem.
*/
ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
LLONG_MAX);
if (ret)
goto out_mmap;
truncate_pagecache(inode, ioffset);
credits = ext4_writepage_trans_blocks(inode); credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
goto out_dio; goto out_mmap;
} }
down_write(&EXT4_I(inode)->i_data_sem); down_write(&EXT4_I(inode)->i_data_sem);
...@@ -5573,7 +5583,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5573,7 +5583,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
out_stop: out_stop:
ext4_journal_stop(handle); ext4_journal_stop(handle);
out_dio: out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode);
out_mutex: out_mutex:
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
...@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
return ret; return ret;
} }
/*
* Need to round down to align start offset to page size boundary
* for page size > block size.
*/
ioffset = round_down(offset, PAGE_SIZE);
/* Write out all dirty pages */
ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
LLONG_MAX);
if (ret)
return ret;
/* Take mutex lock */
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
/* Currently just for extent based files */ /* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
...@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
goto out_mutex; goto out_mutex;
} }
truncate_pagecache(inode, ioffset);
/* Wait for existing dio to complete */ /* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode); ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode); inode_dio_wait(inode);
/*
* Prevent page faults from reinstantiating pages we have released from
* page cache.
*/
down_write(&EXT4_I(inode)->i_mmap_sem);
/*
* Need to round down to align start offset to page size boundary
* for page size > block size.
*/
ioffset = round_down(offset, PAGE_SIZE);
/* Write out all dirty pages */
ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
LLONG_MAX);
if (ret)
goto out_mmap;
truncate_pagecache(inode, ioffset);
credits = ext4_writepage_trans_blocks(inode); credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
goto out_dio; goto out_mmap;
} }
/* Expand file to avoid data loss if there is error while shifting */ /* Expand file to avoid data loss if there is error while shifting */
...@@ -5741,7 +5753,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5741,7 +5753,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
out_stop: out_stop:
ext4_journal_stop(handle); ext4_journal_stop(handle);
out_dio: out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode);
out_mutex: out_mutex:
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
......
...@@ -193,43 +193,35 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -193,43 +193,35 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
} }
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
{
struct inode *inode = bh->b_assoc_map->host;
/* XXX: breaks on 32-bit > 16TB. Is that even supported? */
loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
int err;
if (!uptodate)
return;
WARN_ON(!buffer_unwritten(bh));
err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
}
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
int result; int result;
handle_t *handle = NULL; handle_t *handle = NULL;
struct super_block *sb = file_inode(vma->vm_file)->i_sb; struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE; bool write = vmf->flags & FAULT_FLAG_WRITE;
if (write) { if (write) {
sb_start_pagefault(sb); sb_start_pagefault(sb);
file_update_time(vma->vm_file); file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb)); EXT4_DATA_TRANS_BLOCKS(sb));
} } else
down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle)) if (IS_ERR(handle))
result = VM_FAULT_SIGBUS; result = VM_FAULT_SIGBUS;
else else
result = __dax_fault(vma, vmf, ext4_get_block_dax, result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
ext4_end_io_unwritten);
if (write) { if (write) {
if (!IS_ERR(handle)) if (!IS_ERR(handle))
ext4_journal_stop(handle); ext4_journal_stop(handle);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb); sb_end_pagefault(sb);
} } else
up_read(&EXT4_I(inode)->i_mmap_sem);
return result; return result;
} }
...@@ -246,44 +238,86 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, ...@@ -246,44 +238,86 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
if (write) { if (write) {
sb_start_pagefault(sb); sb_start_pagefault(sb);
file_update_time(vma->vm_file); file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
ext4_chunk_trans_blocks(inode, ext4_chunk_trans_blocks(inode,
PMD_SIZE / PAGE_SIZE)); PMD_SIZE / PAGE_SIZE));
} } else
down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle)) if (IS_ERR(handle))
result = VM_FAULT_SIGBUS; result = VM_FAULT_SIGBUS;
else else
result = __dax_pmd_fault(vma, addr, pmd, flags, result = __dax_pmd_fault(vma, addr, pmd, flags,
ext4_get_block_dax, ext4_end_io_unwritten); ext4_dax_mmap_get_block, NULL);
if (write) { if (write) {
if (!IS_ERR(handle)) if (!IS_ERR(handle))
ext4_journal_stop(handle); ext4_journal_stop(handle);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb); sb_end_pagefault(sb);
} } else
up_read(&EXT4_I(inode)->i_mmap_sem);
return result; return result;
} }
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
return dax_mkwrite(vma, vmf, ext4_get_block_dax, int err;
ext4_end_io_unwritten); struct inode *inode = file_inode(vma->vm_file);
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(inode->i_sb);
return err;
}
/*
* Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
* handler we check for races agaist truncate. Note that since we cycle through
* i_mmap_sem, we are sure that also any hole punching that began before we
* were called is finished by now and so if it included part of the file we
* are working on, our pte will get unmapped and the check for pte_same() in
* wp_pfn_shared() fails. Thus fault gets retried and things work out as
* desired.
*/
static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb;
int ret = VM_FAULT_NOPAGE;
loff_t size;
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
return ret;
} }
static const struct vm_operations_struct ext4_dax_vm_ops = { static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault, .fault = ext4_dax_fault,
.pmd_fault = ext4_dax_pmd_fault, .pmd_fault = ext4_dax_pmd_fault,
.page_mkwrite = ext4_dax_mkwrite, .page_mkwrite = ext4_dax_mkwrite,
.pfn_mkwrite = dax_pfn_mkwrite, .pfn_mkwrite = ext4_dax_pfn_mkwrite,
}; };
#else #else
#define ext4_dax_vm_ops ext4_file_vm_ops #define ext4_dax_vm_ops ext4_file_vm_ops
#endif #endif
static const struct vm_operations_struct ext4_file_vm_ops = { static const struct vm_operations_struct ext4_file_vm_ops = {
.fault = filemap_fault, .fault = ext4_filemap_fault,
.map_pages = filemap_map_pages, .map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite, .page_mkwrite = ext4_page_mkwrite,
}; };
......
...@@ -799,6 +799,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, ...@@ -799,6 +799,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
inode->i_gid = dir->i_gid; inode->i_gid = dir->i_gid;
} else } else
inode_init_owner(inode, dir, mode); inode_init_owner(inode, dir, mode);
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
ei->i_projid = EXT4_I(dir)->i_projid;
else
ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
err = dquot_initialize(inode); err = dquot_initialize(inode);
if (err) if (err)
goto out; goto out;
......
...@@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, ...@@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
*/ */
static int ext4_add_dirent_to_inline(handle_t *handle, static int ext4_add_dirent_to_inline(handle_t *handle,
struct ext4_filename *fname, struct ext4_filename *fname,
struct dentry *dentry, struct inode *dir,
struct inode *inode, struct inode *inode,
struct ext4_iloc *iloc, struct ext4_iloc *iloc,
void *inline_start, int inline_size) void *inline_start, int inline_size)
{ {
struct inode *dir = d_inode(dentry->d_parent);
int err; int err;
struct ext4_dir_entry_2 *de; struct ext4_dir_entry_2 *de;
...@@ -1245,12 +1244,11 @@ static int ext4_convert_inline_data_nolock(handle_t *handle, ...@@ -1245,12 +1244,11 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
* the new created block. * the new created block.
*/ */
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
struct dentry *dentry, struct inode *inode) struct inode *dir, struct inode *inode)
{ {
int ret, inline_size; int ret, inline_size;
void *inline_start; void *inline_start;
struct ext4_iloc iloc; struct ext4_iloc iloc;
struct inode *dir = d_inode(dentry->d_parent);
ret = ext4_get_inode_loc(dir, &iloc); ret = ext4_get_inode_loc(dir, &iloc);
if (ret) if (ret)
...@@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, ...@@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
EXT4_INLINE_DOTDOT_SIZE; EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc, ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
inline_start, inline_size); inline_start, inline_size);
if (ret != -ENOSPC) if (ret != -ENOSPC)
goto out; goto out;
...@@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, ...@@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
if (inline_size) { if (inline_size) {
inline_start = ext4_get_inline_xattr_pos(dir, &iloc); inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
ret = ext4_add_dirent_to_inline(handle, fname, dentry, ret = ext4_add_dirent_to_inline(handle, fname, dir,
inode, &iloc, inline_start, inode, &iloc, inline_start,
inline_size); inline_size);
......
This diff is collapsed.
This diff is collapsed.
...@@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, ...@@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
struct ext4_filename *fname, struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir); struct ext4_dir_entry_2 **res_dir);
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
struct dentry *dentry, struct inode *inode); struct inode *dir, struct inode *inode);
/* checksumming functions */ /* checksumming functions */
void initialize_dirent_tail(struct ext4_dir_entry_tail *t, void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
...@@ -1928,10 +1928,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, ...@@ -1928,10 +1928,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
* directory, and adds the dentry to the indexed directory. * directory, and adds the dentry to the indexed directory.
*/ */
static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
struct dentry *dentry, struct inode *dir,
struct inode *inode, struct buffer_head *bh) struct inode *inode, struct buffer_head *bh)
{ {
struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh2; struct buffer_head *bh2;
struct dx_root *root; struct dx_root *root;
struct dx_frame frames[2], *frame; struct dx_frame frames[2], *frame;
...@@ -2086,8 +2085,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, ...@@ -2086,8 +2085,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return retval; return retval;
if (ext4_has_inline_data(dir)) { if (ext4_has_inline_data(dir)) {
retval = ext4_try_add_inline_entry(handle, &fname, retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
dentry, inode);
if (retval < 0) if (retval < 0)
goto out; goto out;
if (retval == 1) { if (retval == 1) {
...@@ -2097,7 +2095,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, ...@@ -2097,7 +2095,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
} }
if (is_dx(dir)) { if (is_dx(dir)) {
retval = ext4_dx_add_entry(handle, &fname, dentry, inode); retval = ext4_dx_add_entry(handle, &fname, dir, inode);
if (!retval || (retval != ERR_BAD_DX_DIR)) if (!retval || (retval != ERR_BAD_DX_DIR))
goto out; goto out;
ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
...@@ -2119,7 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, ...@@ -2119,7 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if (blocks == 1 && !dx_fallback && if (blocks == 1 && !dx_fallback &&
ext4_has_feature_dir_index(sb)) { ext4_has_feature_dir_index(sb)) {
retval = make_indexed_dir(handle, &fname, dentry, retval = make_indexed_dir(handle, &fname, dir,
inode, bh); inode, bh);
bh = NULL; /* make_indexed_dir releases bh */ bh = NULL; /* make_indexed_dir releases bh */
goto out; goto out;
...@@ -2154,12 +2152,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, ...@@ -2154,12 +2152,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
* Returns 0 for success, or a negative error value * Returns 0 for success, or a negative error value
*/ */
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
struct dentry *dentry, struct inode *inode) struct inode *dir, struct inode *inode)
{ {
struct dx_frame frames[2], *frame; struct dx_frame frames[2], *frame;
struct dx_entry *entries, *at; struct dx_entry *entries, *at;
struct buffer_head *bh; struct buffer_head *bh;
struct inode *dir = d_inode(dentry->d_parent);
struct super_block *sb = dir->i_sb; struct super_block *sb = dir->i_sb;
struct ext4_dir_entry_2 *de; struct ext4_dir_entry_2 *de;
int err; int err;
...@@ -3212,6 +3209,12 @@ static int ext4_link(struct dentry *old_dentry, ...@@ -3212,6 +3209,12 @@ static int ext4_link(struct dentry *old_dentry,
if (ext4_encrypted_inode(dir) && if (ext4_encrypted_inode(dir) &&
!ext4_is_child_context_consistent_with_parent(dir, inode)) !ext4_is_child_context_consistent_with_parent(dir, inode))
return -EPERM; return -EPERM;
if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
(!projid_eq(EXT4_I(dir)->i_projid,
EXT4_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(dir); err = dquot_initialize(dir);
if (err) if (err)
return err; return err;
...@@ -3492,6 +3495,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -3492,6 +3495,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
int credits; int credits;
u8 old_file_type; u8 old_file_type;
if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
(!projid_eq(EXT4_I(new_dir)->i_projid,
EXT4_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
retval = dquot_initialize(old.dir); retval = dquot_initialize(old.dir);
if (retval) if (retval)
return retval; return retval;
...@@ -3701,6 +3709,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -3701,6 +3709,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new.inode))) new.inode)))
return -EPERM; return -EPERM;
if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
!projid_eq(EXT4_I(new_dir)->i_projid,
EXT4_I(old_dentry->d_inode)->i_projid)) ||
(ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
!projid_eq(EXT4_I(old_dir)->i_projid,
EXT4_I(new_dentry->d_inode)->i_projid)))
return -EXDEV;
retval = dquot_initialize(old.dir); retval = dquot_initialize(old.dir);
if (retval) if (retval)
return retval; return retval;
......
...@@ -80,6 +80,36 @@ static void ext4_destroy_lazyinit_thread(void); ...@@ -80,6 +80,36 @@ static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb); static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void); static void ext4_clear_request_list(void);
/*
* Lock ordering
*
* Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
* i_mmap_rwsem (inode->i_mmap_rwsem)!
*
* page fault path:
* mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
* page lock -> i_data_sem (rw)
*
* buffered write path:
* sb_start_write -> i_mutex -> mmap_sem
* sb_start_write -> i_mutex -> transaction start -> page lock ->
* i_data_sem (rw)
*
* truncate:
* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
* i_mmap_rwsem (w) -> page lock
* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
* transaction start -> i_data_sem (rw)
*
* direct IO:
* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
* sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
* transaction start -> i_data_sem (rw)
*
* writepages:
* transaction start -> page lock(s) -> i_data_sem (rw)
*/
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
static struct file_system_type ext2_fs_type = { static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
...@@ -958,6 +988,7 @@ static void init_once(void *foo) ...@@ -958,6 +988,7 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem); init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem); init_rwsem(&ei->i_data_sem);
init_rwsem(&ei->i_mmap_sem);
inode_init_once(&ei->vfs_inode); inode_init_once(&ei->vfs_inode);
} }
...@@ -1066,8 +1097,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, ...@@ -1066,8 +1097,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
} }
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") static char *quotatypes[] = INITQFNAMES;
#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) #define QTYPE2NAME(t) (quotatypes[t])
static int ext4_write_dquot(struct dquot *dquot); static int ext4_write_dquot(struct dquot *dquot);
static int ext4_acquire_dquot(struct dquot *dquot); static int ext4_acquire_dquot(struct dquot *dquot);
...@@ -1100,6 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = { ...@@ -1100,6 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = {
.write_info = ext4_write_info, .write_info = ext4_write_info,
.alloc_dquot = dquot_alloc, .alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy, .destroy_dquot = dquot_destroy,
.get_projid = ext4_get_projid,
}; };
static const struct quotactl_ops ext4_qctl_operations = { static const struct quotactl_ops ext4_qctl_operations = {
...@@ -2526,6 +2558,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) ...@@ -2526,6 +2558,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
"without CONFIG_QUOTA"); "without CONFIG_QUOTA");
return 0; return 0;
} }
if (ext4_has_feature_project(sb) && !readonly) {
ext4_msg(sb, KERN_ERR,
"Filesystem with project quota feature cannot be mounted RDWR "
"without CONFIG_QUOTA");
return 0;
}
#endif /* CONFIG_QUOTA */ #endif /* CONFIG_QUOTA */
return 1; return 1;
} }
...@@ -3654,7 +3692,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3654,7 +3692,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &dquot_quotactl_sysfile_ops; sb->s_qcop = &dquot_quotactl_sysfile_ops;
else else
sb->s_qcop = &ext4_qctl_operations; sb->s_qcop = &ext4_qctl_operations;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif #endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
...@@ -4790,6 +4828,48 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -4790,6 +4828,48 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
return err; return err;
} }
#ifdef CONFIG_QUOTA
static int ext4_statfs_project(struct super_block *sb,
kprojid_t projid, struct kstatfs *buf)
{
struct kqid qid;
struct dquot *dquot;
u64 limit;
u64 curblock;
qid = make_kqid_projid(projid);
dquot = dqget(sb, qid);
if (IS_ERR(dquot))
return PTR_ERR(dquot);
spin_lock(&dq_data_lock);
limit = (dquot->dq_dqb.dqb_bsoftlimit ?
dquot->dq_dqb.dqb_bsoftlimit :
dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
if (limit && buf->f_blocks > limit) {
curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
buf->f_blocks = limit;
buf->f_bfree = buf->f_bavail =
(buf->f_blocks > curblock) ?
(buf->f_blocks - curblock) : 0;
}
limit = dquot->dq_dqb.dqb_isoftlimit ?
dquot->dq_dqb.dqb_isoftlimit :
dquot->dq_dqb.dqb_ihardlimit;
if (limit && buf->f_files > limit) {
buf->f_files = limit;
buf->f_ffree =
(buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
}
spin_unlock(&dq_data_lock);
dqput(dquot);
return 0;
}
#endif
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{ {
struct super_block *sb = dentry->d_sb; struct super_block *sb = dentry->d_sb;
...@@ -4822,6 +4902,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -4822,6 +4902,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
#ifdef CONFIG_QUOTA
if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
sb_has_quota_limits_enabled(sb, PRJQUOTA))
ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
#endif
return 0; return 0;
} }
...@@ -4986,7 +5071,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, ...@@ -4986,7 +5071,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
struct inode *qf_inode; struct inode *qf_inode;
unsigned long qf_inums[EXT4_MAXQUOTAS] = { unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
}; };
BUG_ON(!ext4_has_feature_quota(sb)); BUG_ON(!ext4_has_feature_quota(sb));
...@@ -5014,7 +5100,8 @@ static int ext4_enable_quotas(struct super_block *sb) ...@@ -5014,7 +5100,8 @@ static int ext4_enable_quotas(struct super_block *sb)
int type, err = 0; int type, err = 0;
unsigned long qf_inums[EXT4_MAXQUOTAS] = { unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
}; };
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
......
...@@ -10,8 +10,10 @@ ...@@ -10,8 +10,10 @@
*/ */
static inline void ext4_truncate_failed_write(struct inode *inode) static inline void ext4_truncate_failed_write(struct inode *inode)
{ {
down_write(&EXT4_I(inode)->i_mmap_sem);
truncate_inode_pages(inode->i_mapping, inode->i_size); truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode); ext4_truncate(inode);
up_write(&EXT4_I(inode)->i_mmap_sem);
} }
/* /*
......
...@@ -43,7 +43,7 @@ struct extent_status; ...@@ -43,7 +43,7 @@ struct extent_status;
{ EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \
{ EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \
{ EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \
{ EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }) { EXT4_GET_BLOCKS_ZERO, "ZERO" })
#define show_mflags(flags) __print_flags(flags, "", \ #define show_mflags(flags) __print_flags(flags, "", \
{ EXT4_MAP_NEW, "N" }, \ { EXT4_MAP_NEW, "N" }, \
......
...@@ -2,8 +2,11 @@ ...@@ -2,8 +2,11 @@
#define _UAPI_LINUX_FS_H #define _UAPI_LINUX_FS_H
/* /*
* This file has definitions for some important file table * This file has definitions for some important file table structures
* structures etc. * and constants and structures used by various generic file system
* ioctl's. Please do not make any changes in this file before
* sending patches for review to linux-fsdevel@vger.kernel.org and
* linux-api@vger.kernel.org.
*/ */
#include <linux/limits.h> #include <linux/limits.h>
...@@ -246,6 +249,23 @@ struct fsxattr { ...@@ -246,6 +249,23 @@ struct fsxattr {
/* /*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
*
* Note: for historical reasons, these flags were originally used and
* defined for use by ext2/ext3, and then other file systems started
* using these flags so they wouldn't need to write their own version
* of chattr/lsattr (which was shipped as part of e2fsprogs). You
* should think twice before trying to use these flags in new
* contexts, or trying to assign these flags, since they are used both
* as the UAPI and the on-disk encoding for ext2/3/4. Also, we are
* almost out of 32-bit flags. :-)
*
* We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
* XFS to the generic FS level interface. This uses a structure that
* has padding and hence has more room to grow, so it may be more
* appropriate for many new use cases.
*
* Please do not change these flags or interfaces before checking with
* linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
*/ */
#define FS_SECRM_FL 0x00000001 /* Secure deletion */ #define FS_SECRM_FL 0x00000001 /* Secure deletion */
#define FS_UNRM_FL 0x00000002 /* Undelete */ #define FS_UNRM_FL 0x00000002 /* Undelete */
...@@ -259,8 +279,8 @@ struct fsxattr { ...@@ -259,8 +279,8 @@ struct fsxattr {
#define FS_DIRTY_FL 0x00000100 #define FS_DIRTY_FL 0x00000100
#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
#define FS_NOCOMP_FL 0x00000400 /* Don't compress */ #define FS_NOCOMP_FL 0x00000400 /* Don't compress */
#define FS_ECOMPR_FL 0x00000800 /* Compression error */
/* End compression flags --- maybe not all used */ /* End compression flags --- maybe not all used */
#define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */
#define FS_BTREE_FL 0x00001000 /* btree format dir */ #define FS_BTREE_FL 0x00001000 /* btree format dir */
#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
#define FS_IMAGIC_FL 0x00002000 /* AFS directory */ #define FS_IMAGIC_FL 0x00002000 /* AFS directory */
...@@ -268,9 +288,12 @@ struct fsxattr { ...@@ -268,9 +288,12 @@ struct fsxattr {
#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */
#define FS_EXTENT_FL 0x00080000 /* Extents */ #define FS_EXTENT_FL 0x00080000 /* Extents */
#define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */
#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment