Commit ef7f3835 authored by Kalpak Shah's avatar Kalpak Shah Committed by Theodore Ts'o

ext4: Add nanosecond timestamps

This patch adds nanosecond timestamps for ext4. This involves adding
*time_extra fields to the ext4_inode to extend the timestamps to
64-bits.  Creation time is also added by this patch.

These extended fields will fit into an inode if the filesystem was
formatted with large inodes (-I 256 or larger) and there are currently
no EAs consuming all of the available space. For new inodes we always
reserve enough space for the kernel's known extended fields, but for
inodes created with an old kernel this might not have been the case. So
this patch also adds the EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature
flag(ro-compat so that older kernels can't create inodes with a smaller
extra_isize). which indicates if the fields fitting inside
s_min_extra_isize are available or not.  If the expansion of inodes if
unsuccessful then this feature will be disabled.  This feature is only
enabled if requested by the sysadmin.

None of the extended inode fields is critical for correct filesystem
operation.
Signed-off-by: default avatarAndreas Dilger <adilger@clusterfs.com>
Signed-off-by: default avatarKalpak Shah <kalpak@clusterfs.com>
Signed-off-by: default avatarEric Sandeen <sandeen@redhat.com>
Signed-off-by: default avatarDave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: default avatarMingming Cao <cmm@us.ibm.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 0f49d5d0
......@@ -563,7 +563,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
inode->i_ino = ino;
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
ext4_current_time(inode);
memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_dir_start_lookup = 0;
......@@ -595,9 +596,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
spin_unlock(&sbi->s_next_gen_lock);
ei->i_state = EXT4_STATE_NEW;
ei->i_extra_isize =
(EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ?
sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0;
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
......
......@@ -726,7 +726,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
/* We are done with atomic stuff, now do the rest of housekeeping */
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
/* had we spliced it onto indirect block? */
......@@ -2375,7 +2375,7 @@ void ext4_truncate(struct inode *inode)
ext4_discard_reservation(inode);
mutex_unlock(&ei->truncate_mutex);
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
/*
......@@ -2629,10 +2629,6 @@ void ext4_read_inode(struct inode * inode)
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
ei->i_state = 0;
ei->i_dir_start_lookup = 0;
......@@ -2710,6 +2706,11 @@ void ext4_read_inode(struct inode * inode)
} else
ei->i_extra_isize = 0;
EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
......@@ -2791,9 +2792,12 @@ static int ext4_do_update_inode(handle_t *handle,
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(ei->i_disksize);
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
......
......@@ -97,7 +97,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
ei->i_flags = flags;
ext4_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
flags_err:
......@@ -134,7 +134,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
return PTR_ERR(handle);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
inode->i_generation = generation;
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
......
......@@ -1295,7 +1295,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
* happen is that the times are slightly out of date
* and/or different from the directory change time.
*/
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
dir->i_version++;
ext4_mark_inode_dirty(handle, dir);
......@@ -2056,7 +2056,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
* recovery. */
inode->i_size = 0;
ext4_orphan_add(handle, inode);
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
drop_nlink(dir);
ext4_update_dx_flag(dir);
......@@ -2106,13 +2106,13 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto end_unlink;
dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
drop_nlink(inode);
if (!inode->i_nlink)
ext4_orphan_add(handle, inode);
inode->i_ctime = dir->i_ctime;
inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
retval = 0;
......@@ -2203,7 +2203,7 @@ static int ext4_link (struct dentry * old_dentry,
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
inc_nlink(inode);
atomic_inc(&inode->i_count);
......@@ -2305,7 +2305,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
old_inode->i_ctime = ext4_current_time(old_inode);
ext4_mark_inode_dirty(handle, old_inode);
/*
......@@ -2338,9 +2338,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
if (new_inode) {
drop_nlink(new_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
new_inode->i_ctime = ext4_current_time(new_inode);
}
old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
ext4_update_dx_flag(old_dir);
if (dir_bh) {
BUFFER_TRACE(dir_bh, "get_write_access");
......
......@@ -1651,6 +1651,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_inode_size);
goto failed_mount;
}
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
}
sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
......@@ -1874,6 +1876,32 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
}
ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
/* determine the minimum size of new large inodes, if present */
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_want_extra_isize))
sbi->s_want_extra_isize =
le16_to_cpu(es->s_want_extra_isize);
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_min_extra_isize))
sbi->s_want_extra_isize =
le16_to_cpu(es->s_min_extra_isize);
}
}
/* Check if enough inode space is available */
if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
sbi->s_inode_size) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
printk(KERN_INFO "EXT4-fs: required extra inode space not"
"available.\n");
}
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
......
......@@ -1013,7 +1013,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
}
if (!error) {
ext4_xattr_update_super_block(handle, inode->i_sb);
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
/*
* The bh is consumed by ext4_mark_iloc_dirty, even with
......
......@@ -288,7 +288,7 @@ struct ext4_inode {
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
__le32 i_ctime; /* Creation time */
__le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
......@@ -337,10 +337,85 @@ struct ext4_inode {
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
__le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
__le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
__le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
__le32 i_crtime; /* File Creation time */
__le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
};
#define i_size_high i_dir_acl
#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
/*
* Extended fields will fit into an inode if the filesystem was formatted
* with large inodes (-I 256 or larger) and there are not currently any EAs
* consuming all of the available space. For new inodes we always reserve
* enough space for the kernel's known extended fields, but for inodes
* created with an old kernel this might not have been the case. None of
* the extended inode fields is critical for correct filesystem operation.
* This macro checks if a certain field fits in the inode. Note that
* inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
*/
#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
((offsetof(typeof(*ext4_inode), field) + \
sizeof((ext4_inode)->field)) \
<= (EXT4_GOOD_OLD_INODE_SIZE + \
(einode)->i_extra_isize)) \
static inline __le32 ext4_encode_extra_time(struct timespec *time)
{
return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
time->tv_sec >> 32 : 0) |
((time->tv_nsec << 2) & EXT4_NSEC_MASK));
}
static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
{
if (sizeof(time->tv_sec) > 4)
time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
<< 32;
time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
}
#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
do { \
(raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
(raw_inode)->xtime ## _extra = \
ext4_encode_extra_time(&(inode)->xtime); \
} while (0)
#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
(raw_inode)->xtime ## _extra = \
ext4_encode_extra_time(&(einode)->xtime); \
} while (0)
#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
do { \
(inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
ext4_decode_extra_time(&(inode)->xtime, \
raw_inode->xtime ## _extra); \
} while (0)
#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(einode)->xtime.tv_sec = \
(signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \
} while (0)
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
#define i_frag osd2.linux2.l_i_frag
......@@ -539,6 +614,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
return container_of(inode, struct ext4_inode_info, vfs_inode);
}
static inline struct timespec ext4_current_time(struct inode *inode)
{
return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
}
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
......@@ -609,6 +691,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
......@@ -626,6 +709,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
EXT4_FEATURE_INCOMPAT_64BIT)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
/*
......
......@@ -153,6 +153,11 @@ struct ext4_inode_info {
unsigned long i_ext_generation;
struct ext4_ext_cache i_cached_extent;
/*
* File creation time. Its function is same as that of
* struct timespec i_{a,c,m}time in the generic inode.
*/
struct timespec i_crtime;
};
#endif /* _LINUX_EXT4_FS_I */
......@@ -81,6 +81,7 @@ struct ext4_sb_info {
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
#ifdef EXTENTS_STATS
/* ext4 extents stats */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment