Commit 0732f877 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  jbd2: clean up jbd2_journal_try_to_free_buffers()
  ext4: Don't update ctime for non-extent-mapped inodes
  ext4: Fix up whitespace issues in fs/ext4/inode.c
  ext4: Fix 64-bit block type problem on 32-bit platforms
  ext4: teach the inode allocator to use a goal inode number
  ext4: Use a hash of the topdir directory name for the Orlov parent group
  ext4: document the "abort" mount option
  ext4: move the abort flag from s_mount_opts to s_mount_flags
  ext4: update the s_last_mounted field in the superblock
  ext4: change s_mount_opt to be an unsigned int
  ext4: online defrag -- Add EXT4_IOC_MOVE_EXT ioctl
  ext4: avoid unnecessary spinlock in critical POSIX ACL path
  ext3: avoid unnecessary spinlock in critical POSIX ACL path
  ext4: convert instrumentation from markers to tracepoints
  jbd2: convert instrumentation from markers to tracepoints
parents 15fc204a 536fc240
...@@ -79,3 +79,13 @@ Description: ...@@ -79,3 +79,13 @@ Description:
This file is read-only and shows the number of This file is read-only and shows the number of
kilobytes of data that have been written to this kilobytes of data that have been written to this
filesystem since it was mounted. filesystem since it was mounted.
What: /sys/fs/ext4/<disk>/inode_goal
Date: June 2008
Contact: "Theodore Ts'o" <tytso@mit.edu>
Description:
Tuning parameter which (if non-zero) controls the goal
inode used by the inode allocator in p0reference to
all other allocation hueristics. This is intended for
debugging use only, and should be 0 on production
systems.
...@@ -235,6 +235,10 @@ minixdf Make 'df' act like Minix. ...@@ -235,6 +235,10 @@ minixdf Make 'df' act like Minix.
debug Extra debugging information is sent to syslog. debug Extra debugging information is sent to syslog.
abort Simulate the effects of calling ext4_abort() for
debugging purposes. This is normally used while
remounting a filesystem which is already mounted.
errors=remount-ro Remount the filesystem read-only on an error. errors=remount-ro Remount the filesystem read-only on an error.
errors=continue Keep going on a filesystem error. errors=continue Keep going on a filesystem error.
errors=panic Panic and halt the machine if an error occurs. errors=panic Panic and halt the machine if an error occurs.
......
...@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o ...@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
......
...@@ -352,6 +352,7 @@ struct ext4_new_group_data { ...@@ -352,6 +352,7 @@ struct ext4_new_group_data {
/* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
/* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
/* /*
* ioctl commands in 32 bit emulation * ioctl commands in 32 bit emulation
...@@ -447,6 +448,15 @@ struct ext4_inode { ...@@ -447,6 +448,15 @@ struct ext4_inode {
__le32 i_version_hi; /* high 32 bits for 64-bit version */ __le32 i_version_hi; /* high 32 bits for 64-bit version */
}; };
struct move_extent {
__u32 reserved; /* should be zero */
__u32 donor_fd; /* donor file descriptor */
__u64 orig_start; /* logical start offset in block for orig */
__u64 donor_start; /* logical start offset in block for donor */
__u64 len; /* block length to be moved */
__u64 moved_len; /* moved block length */
};
#define MAX_DEFRAG_SIZE ((1UL<<31) - 1)
#define EXT4_EPOCH_BITS 2 #define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
...@@ -674,7 +684,6 @@ struct ext4_inode_info { ...@@ -674,7 +684,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
...@@ -696,17 +705,10 @@ struct ext4_inode_info { ...@@ -696,17 +705,10 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
#define set_opt(o, opt) o |= EXT4_MOUNT_##opt #define set_opt(o, opt) o |= EXT4_MOUNT_##opt
#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
EXT4_MOUNT_##opt) EXT4_MOUNT_##opt)
#else
#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
#endif
#define ext4_set_bit ext2_set_bit #define ext4_set_bit ext2_set_bit
#define ext4_set_bit_atomic ext2_set_bit_atomic #define ext4_set_bit_atomic ext2_set_bit_atomic
...@@ -824,6 +826,13 @@ struct ext4_super_block { ...@@ -824,6 +826,13 @@ struct ext4_super_block {
}; };
#ifdef __KERNEL__ #ifdef __KERNEL__
/*
* run-time mount flags
*/
#define EXT4_MF_MNTDIR_SAMPLED 0x0001
#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
/* /*
* fourth extended-fs super-block data in memory * fourth extended-fs super-block data in memory
*/ */
...@@ -842,7 +851,8 @@ struct ext4_sb_info { ...@@ -842,7 +851,8 @@ struct ext4_sb_info {
struct buffer_head * s_sbh; /* Buffer containing the super block */ struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc; struct buffer_head **s_group_desc;
unsigned long s_mount_opt; unsigned int s_mount_opt;
unsigned int s_mount_flags;
ext4_fsblk_t s_sb_block; ext4_fsblk_t s_sb_block;
uid_t s_resuid; uid_t s_resuid;
gid_t s_resgid; gid_t s_resgid;
...@@ -853,6 +863,7 @@ struct ext4_sb_info { ...@@ -853,6 +863,7 @@ struct ext4_sb_info {
int s_inode_size; int s_inode_size;
int s_first_ino; int s_first_ino;
unsigned int s_inode_readahead_blks; unsigned int s_inode_readahead_blks;
unsigned int s_inode_goal;
spinlock_t s_next_gen_lock; spinlock_t s_next_gen_lock;
u32 s_next_generation; u32 s_next_generation;
u32 s_hash_seed[4]; u32 s_hash_seed[4];
...@@ -1305,7 +1316,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct ...@@ -1305,7 +1316,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
dx_hash_info *hinfo); dx_hash_info *hinfo);
/* ialloc.c */ /* ialloc.c */
extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
const struct qstr *qstr, __u32 goal);
extern void ext4_free_inode(handle_t *, struct inode *); extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *); extern unsigned long ext4_count_free_inodes(struct super_block *);
...@@ -1329,7 +1341,7 @@ extern void ext4_discard_preallocations(struct inode *); ...@@ -1329,7 +1341,7 @@ extern void ext4_discard_preallocations(struct inode *);
extern int __init init_ext4_mballoc(void); extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void); extern void exit_ext4_mballoc(void);
extern void ext4_mb_free_blocks(handle_t *, struct inode *, extern void ext4_mb_free_blocks(handle_t *, struct inode *,
unsigned long, unsigned long, int, unsigned long *); ext4_fsblk_t, unsigned long, int, unsigned long *);
extern int ext4_mb_add_groupinfo(struct super_block *sb, extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc); ext4_group_t i, struct ext4_group_desc *desc);
extern void ext4_mb_update_group_info(struct ext4_group_info *grp, extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
...@@ -1647,6 +1659,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode, ...@@ -1647,6 +1659,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int flags); struct buffer_head *bh, int flags);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len); __u64 start, __u64 len);
/* move_extent.c */
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
/* /*
* Add new method to test wether block and inode bitmaps are properly * Add new method to test wether block and inode bitmaps are properly
......
...@@ -221,12 +221,16 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) ...@@ -221,12 +221,16 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
} }
extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num, int num,
struct ext4_ext_path *path); struct ext4_ext_path *path);
extern int ext4_can_extents_be_merged(struct inode *inode,
struct ext4_extent *ex1,
struct ext4_extent *ex2);
extern int ext4_ext_try_to_merge(struct inode *inode, extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path, struct ext4_ext_path *path,
struct ext4_extent *); struct ext4_extent *);
......
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
* ext_pblock: * ext_pblock:
* combine low and high parts of physical block number into ext4_fsblk_t * combine low and high parts of physical block number into ext4_fsblk_t
*/ */
static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
{ {
ext4_fsblk_t block; ext4_fsblk_t block;
...@@ -1417,7 +1417,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, ...@@ -1417,7 +1417,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
return err; return err;
} }
static int int
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
struct ext4_extent *ex2) struct ext4_extent *ex2)
{ {
......
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#include <linux/time.h> #include <linux/time.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/jbd2.h> #include <linux/jbd2.h>
#include <linux/mount.h>
#include <linux/path.h>
#include "ext4.h" #include "ext4.h"
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "xattr.h" #include "xattr.h"
...@@ -145,6 +147,38 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -145,6 +147,38 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0; return 0;
} }
static int ext4_file_open(struct inode * inode, struct file * filp)
{
struct super_block *sb = inode->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct vfsmount *mnt = filp->f_path.mnt;
struct path path;
char buf[64], *cp;
if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
!(sb->s_flags & MS_RDONLY))) {
sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
/*
* Sample where the filesystem has been mounted and
* store it in the superblock for sysadmin convenience
* when trying to sort through large numbers of block
* devices or filesystem images.
*/
memset(buf, 0, sizeof(buf));
path.mnt = mnt->mnt_parent;
path.dentry = mnt->mnt_mountpoint;
path_get(&path);
cp = d_path(&path, buf, sizeof(buf));
path_put(&path);
if (!IS_ERR(cp)) {
memcpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted));
sb->s_dirt = 1;
}
}
return generic_file_open(inode, filp);
}
const struct file_operations ext4_file_operations = { const struct file_operations ext4_file_operations = {
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
.read = do_sync_read, .read = do_sync_read,
...@@ -156,7 +190,7 @@ const struct file_operations ext4_file_operations = { ...@@ -156,7 +190,7 @@ const struct file_operations ext4_file_operations = {
.compat_ioctl = ext4_compat_ioctl, .compat_ioctl = ext4_compat_ioctl,
#endif #endif
.mmap = ext4_file_mmap, .mmap = ext4_file_mmap,
.open = generic_file_open, .open = ext4_file_open,
.release = ext4_release_file, .release = ext4_release_file,
.fsync = ext4_sync_file, .fsync = ext4_sync_file,
.splice_read = generic_file_splice_read, .splice_read = generic_file_splice_read,
......
...@@ -28,10 +28,12 @@ ...@@ -28,10 +28,12 @@
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/jbd2.h> #include <linux/jbd2.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/marker.h>
#include "ext4.h" #include "ext4.h"
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include <trace/events/ext4.h>
/* /*
* akpm: A new design for ext4_sync_file(). * akpm: A new design for ext4_sync_file().
* *
...@@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) ...@@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
J_ASSERT(ext4_journal_current_handle() == NULL); J_ASSERT(ext4_journal_current_handle() == NULL);
trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", trace_ext4_sync_file(file, dentry, datasync);
inode->i_sb->s_id, datasync, inode->i_ino,
dentry->d_parent->d_inode->i_ino);
/* /*
* data=writeback: * data=writeback:
......
...@@ -23,11 +23,14 @@ ...@@ -23,11 +23,14 @@
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include "ext4.h" #include "ext4.h"
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "xattr.h" #include "xattr.h"
#include "acl.h" #include "acl.h"
#include <trace/events/ext4.h>
/* /*
* ialloc.c contains the inodes allocation and deallocation routines * ialloc.c contains the inodes allocation and deallocation routines
*/ */
...@@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ...@@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
ino = inode->i_ino; ino = inode->i_ino;
ext4_debug("freeing inode %lu\n", ino); ext4_debug("freeing inode %lu\n", ino);
trace_mark(ext4_free_inode, trace_ext4_free_inode(inode);
"dev %s ino %lu mode %d uid %lu gid %lu bocks %llu",
sb->s_id, inode->i_ino, inode->i_mode,
(unsigned long) inode->i_uid, (unsigned long) inode->i_gid,
(unsigned long long) inode->i_blocks);
/* /*
* Note: we must free any quota before locking the superblock, * Note: we must free any quota before locking the superblock,
...@@ -471,7 +470,8 @@ void get_orlov_stats(struct super_block *sb, ext4_group_t g, ...@@ -471,7 +470,8 @@ void get_orlov_stats(struct super_block *sb, ext4_group_t g,
*/ */
static int find_group_orlov(struct super_block *sb, struct inode *parent, static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t *group, int mode) ext4_group_t *group, int mode,
const struct qstr *qstr)
{ {
ext4_group_t parent_group = EXT4_I(parent)->i_block_group; ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
...@@ -486,6 +486,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, ...@@ -486,6 +486,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
struct orlov_stats stats; struct orlov_stats stats;
int flex_size = ext4_flex_bg_size(sbi); int flex_size = ext4_flex_bg_size(sbi);
struct dx_hash_info hinfo;
ngroups = real_ngroups; ngroups = real_ngroups;
if (flex_size > 1) { if (flex_size > 1) {
...@@ -507,6 +508,12 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, ...@@ -507,6 +508,12 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
int best_ndir = inodes_per_group; int best_ndir = inodes_per_group;
int ret = -1; int ret = -1;
if (qstr) {
hinfo.hash_version = DX_HASH_HALF_MD4;
hinfo.seed = sbi->s_hash_seed;
ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
grp = hinfo.hash;
} else
get_random_bytes(&grp, sizeof(grp)); get_random_bytes(&grp, sizeof(grp));
parent_group = (unsigned)grp % ngroups; parent_group = (unsigned)grp % ngroups;
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
...@@ -650,7 +657,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, ...@@ -650,7 +657,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
*group = parent_group + flex_size; *group = parent_group + flex_size;
if (*group > ngroups) if (*group > ngroups)
*group = 0; *group = 0;
return find_group_orlov(sb, parent, group, mode); return find_group_orlov(sb, parent, group, mode, 0);
} }
/* /*
...@@ -791,7 +798,8 @@ static int ext4_claim_inode(struct super_block *sb, ...@@ -791,7 +798,8 @@ static int ext4_claim_inode(struct super_block *sb,
* For other inodes, search forward from the parent directory's block * For other inodes, search forward from the parent directory's block
* group to find a free inode. * group to find a free inode.
*/ */
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
const struct qstr *qstr, __u32 goal)
{ {
struct super_block *sb; struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL; struct buffer_head *inode_bitmap_bh = NULL;
...@@ -815,14 +823,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) ...@@ -815,14 +823,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
sb = dir->i_sb; sb = dir->i_sb;
ngroups = ext4_get_groups_count(sb); ngroups = ext4_get_groups_count(sb);
trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, trace_ext4_request_inode(dir, mode);
dir->i_ino, mode);
inode = new_inode(sb); inode = new_inode(sb);
if (!inode) if (!inode)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ei = EXT4_I(inode); ei = EXT4_I(inode);
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
if (!goal)
goal = sbi->s_inode_goal;
if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) {
group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
ret2 = 0;
goto got_group;
}
if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
ret2 = find_group_flex(sb, dir, &group); ret2 = find_group_flex(sb, dir, &group);
if (ret2 == -1) { if (ret2 == -1) {
...@@ -841,7 +858,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) ...@@ -841,7 +858,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
if (test_opt(sb, OLDALLOC)) if (test_opt(sb, OLDALLOC))
ret2 = find_group_dir(sb, dir, &group); ret2 = find_group_dir(sb, dir, &group);
else else
ret2 = find_group_orlov(sb, dir, &group, mode); ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
} else } else
ret2 = find_group_other(sb, dir, &group, mode); ret2 = find_group_other(sb, dir, &group, mode);
...@@ -851,7 +868,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) ...@@ -851,7 +868,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
if (ret2 == -1) if (ret2 == -1)
goto out; goto out;
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++, ino = 0) {
err = -EIO; err = -EIO;
gdp = ext4_get_group_desc(sb, group, &group_desc_bh); gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
...@@ -863,8 +880,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) ...@@ -863,8 +880,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
if (!inode_bitmap_bh) if (!inode_bitmap_bh)
goto fail; goto fail;
ino = 0;
repeat_in_this_group: repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *) ino = ext4_find_next_zero_bit((unsigned long *)
inode_bitmap_bh->b_data, inode_bitmap_bh->b_data,
...@@ -1047,8 +1062,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) ...@@ -1047,8 +1062,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
} }
ext4_debug("allocating inode %lu\n", inode->i_ino); ext4_debug("allocating inode %lu\n", inode->i_ino);
trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d", trace_ext4_allocate_inode(inode, dir, mode);
sb->s_id, inode->i_ino, dir->i_ino, mode);
goto really_out; goto really_out;
fail: fail:
ext4_std_error(sb, err); ext4_std_error(sb, err);
......
...@@ -37,11 +37,14 @@ ...@@ -37,11 +37,14 @@
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/bio.h> #include <linux/bio.h>
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "xattr.h" #include "xattr.h"
#include "acl.h" #include "acl.h"
#include "ext4_extents.h" #include "ext4_extents.h"
#include <trace/events/ext4.h>
#define MPAGE_DA_EXTENT_TAIL 0x01 #define MPAGE_DA_EXTENT_TAIL 0x01
static inline int ext4_begin_ordered_truncate(struct inode *inode, static inline int ext4_begin_ordered_truncate(struct inode *inode,
...@@ -90,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, ...@@ -90,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
BUFFER_TRACE(bh, "enter"); BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
"data mode %lx\n", "data mode %x\n",
bh, is_metadata, inode->i_mode, bh, is_metadata, inode->i_mode,
test_opt(inode->i_sb, DATA_FLAGS)); test_opt(inode->i_sb, DATA_FLAGS));
...@@ -777,7 +780,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, ...@@ -777,7 +780,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
* the chain to point to the new allocated * the chain to point to the new allocated
* data blocks numbers * data blocks numbers
*/ */
for (i=1; i < num; i++) for (i = 1; i < num; i++)
*(branch[n].p + i) = cpu_to_le32(++current_block); *(branch[n].p + i) = cpu_to_le32(++current_block);
} }
BUFFER_TRACE(bh, "marking uptodate"); BUFFER_TRACE(bh, "marking uptodate");
...@@ -820,7 +823,8 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, ...@@ -820,7 +823,8 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
* chain to new block and return 0. * chain to new block and return 0.
*/ */
static int ext4_splice_branch(handle_t *handle, struct inode *inode, static int ext4_splice_branch(handle_t *handle, struct inode *inode,
ext4_lblk_t block, Indirect *where, int num, int blks) ext4_lblk_t block, Indirect *where, int num,
int blks)
{ {
int i; int i;
int err = 0; int err = 0;
...@@ -852,10 +856,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, ...@@ -852,10 +856,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
} }
/* We are done with atomic stuff, now do the rest of housekeeping */ /* We are done with atomic stuff, now do the rest of housekeeping */
inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
/* had we spliced it onto indirect block? */ /* had we spliced it onto indirect block? */
if (where->bh) { if (where->bh) {
/* /*
...@@ -874,8 +874,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, ...@@ -874,8 +874,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
} else { } else {
/* /*
* OK, we spliced it into the inode itself on a direct block. * OK, we spliced it into the inode itself on a direct block.
* Inode was dirtied above.
*/ */
ext4_mark_inode_dirty(handle, inode);
jbd_debug(5, "splicing direct\n"); jbd_debug(5, "splicing direct\n");
} }
return err; return err;
...@@ -1405,8 +1405,7 @@ static int walk_page_buffers(handle_t *handle, ...@@ -1405,8 +1405,7 @@ static int walk_page_buffers(handle_t *handle,
for (bh = head, block_start = 0; for (bh = head, block_start = 0;
ret == 0 && (bh != head || !block_start); ret == 0 && (bh != head || !block_start);
block_start = block_end, bh = next) block_start = block_end, bh = next) {
{
next = bh->b_this_page; next = bh->b_this_page;
block_end = block_start + blocksize; block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) { if (block_end <= from || block_start >= to) {
...@@ -1466,10 +1465,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, ...@@ -1466,10 +1465,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index; pgoff_t index;
unsigned from, to; unsigned from, to;
trace_mark(ext4_write_begin, trace_ext4_write_begin(inode, pos, len, flags);
"dev %s ino %lu pos %llu len %u flags %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, flags);
/* /*
* Reserve one block more for addition to orphan list in case * Reserve one block more for addition to orphan list in case
* we allocate blocks but write fails for some reason * we allocate blocks but write fails for some reason
...@@ -1611,10 +1607,7 @@ static int ext4_ordered_write_end(struct file *file, ...@@ -1611,10 +1607,7 @@ static int ext4_ordered_write_end(struct file *file,
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
int ret = 0, ret2; int ret = 0, ret2;
trace_mark(ext4_ordered_write_end, trace_ext4_ordered_write_end(inode, pos, len, copied);
"dev %s ino %lu pos %llu len %u copied %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, copied);
ret = ext4_jbd2_file_inode(handle, inode); ret = ext4_jbd2_file_inode(handle, inode);
if (ret == 0) { if (ret == 0) {
...@@ -1658,10 +1651,7 @@ static int ext4_writeback_write_end(struct file *file, ...@@ -1658,10 +1651,7 @@ static int ext4_writeback_write_end(struct file *file,
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
int ret = 0, ret2; int ret = 0, ret2;
trace_mark(ext4_writeback_write_end, trace_ext4_writeback_write_end(inode, pos, len, copied);
"dev %s ino %lu pos %llu len %u copied %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, copied);
ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
page, fsdata); page, fsdata);
copied = ret2; copied = ret2;
...@@ -1705,10 +1695,7 @@ static int ext4_journalled_write_end(struct file *file, ...@@ -1705,10 +1695,7 @@ static int ext4_journalled_write_end(struct file *file,
unsigned from, to; unsigned from, to;
loff_t new_i_size; loff_t new_i_size;
trace_mark(ext4_journalled_write_end, trace_ext4_journalled_write_end(inode, pos, len, copied);
"dev %s ino %lu pos %llu len %u copied %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1); from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len; to = from + len;
...@@ -2554,9 +2541,7 @@ static int ext4_da_writepage(struct page *page, ...@@ -2554,9 +2541,7 @@ static int ext4_da_writepage(struct page *page,
struct buffer_head *page_bufs; struct buffer_head *page_bufs;
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
trace_mark(ext4_da_writepage, trace_ext4_da_writepage(inode, page);
"dev %s ino %lu page_index %lu",
inode->i_sb->s_id, inode->i_ino, page->index);
size = i_size_read(inode); size = i_size_read(inode);
if (page->index == size >> PAGE_CACHE_SHIFT) if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK; len = size & ~PAGE_CACHE_MASK;
...@@ -2667,19 +2652,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2667,19 +2652,7 @@ static int ext4_da_writepages(struct address_space *mapping,
int needed_blocks, ret = 0, nr_to_writebump = 0; int needed_blocks, ret = 0, nr_to_writebump = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
trace_mark(ext4_da_writepages, trace_ext4_da_writepages(inode, wbc);
"dev %s ino %lu nr_t_write %ld "
"pages_skipped %ld range_start %llu "
"range_end %llu nonblocking %d "
"for_kupdate %d for_reclaim %d "
"for_writepages %d range_cyclic %d",
inode->i_sb->s_id, inode->i_ino,
wbc->nr_to_write, wbc->pages_skipped,
(unsigned long long) wbc->range_start,
(unsigned long long) wbc->range_end,
wbc->nonblocking, wbc->for_kupdate,
wbc->for_reclaim, wbc->for_writepages,
wbc->range_cyclic);
/* /*
* No pages to write? This is mainly a kludge to avoid starting * No pages to write? This is mainly a kludge to avoid starting
...@@ -2693,13 +2666,13 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2693,13 +2666,13 @@ static int ext4_da_writepages(struct address_space *mapping,
* If the filesystem has aborted, it is read-only, so return * If the filesystem has aborted, it is read-only, so return
* right away instead of dumping stack traces later on that * right away instead of dumping stack traces later on that
* will obscure the real source of the problem. We test * will obscure the real source of the problem. We test
* EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
* the latter could be true if the filesystem is mounted * the latter could be true if the filesystem is mounted
* read-only, and in that case, ext4_da_writepages should * read-only, and in that case, ext4_da_writepages should
* *never* be called, so if that ever happens, we would want * *never* be called, so if that ever happens, we would want
* the stack trace. * the stack trace.
*/ */
if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT)) if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
return -EROFS; return -EROFS;
/* /*
...@@ -2845,14 +2818,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2845,14 +2818,7 @@ static int ext4_da_writepages(struct address_space *mapping,
if (!no_nrwrite_index_update) if (!no_nrwrite_index_update)
wbc->no_nrwrite_index_update = 0; wbc->no_nrwrite_index_update = 0;
wbc->nr_to_write -= nr_to_writebump; wbc->nr_to_write -= nr_to_writebump;
trace_mark(ext4_da_writepage_result, trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
"dev %s ino %lu ret %d pages_written %d "
"pages_skipped %ld congestion %d "
"more_io %d no_nrwrite_index_update %d",
inode->i_sb->s_id, inode->i_ino, ret,
pages_written, wbc->pages_skipped,
wbc->encountered_congestion, wbc->more_io,
wbc->no_nrwrite_index_update);
return ret; return ret;
} }
...@@ -2904,11 +2870,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, ...@@ -2904,11 +2870,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
len, flags, pagep, fsdata); len, flags, pagep, fsdata);
} }
*fsdata = (void *)0; *fsdata = (void *)0;
trace_ext4_da_write_begin(inode, pos, len, flags);
trace_mark(ext4_da_write_begin,
"dev %s ino %lu pos %llu len %u flags %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, flags);
retry: retry:
/* /*
* With delayed allocation, we don't log the i_disksize update * With delayed allocation, we don't log the i_disksize update
...@@ -3001,10 +2963,7 @@ static int ext4_da_write_end(struct file *file, ...@@ -3001,10 +2963,7 @@ static int ext4_da_write_end(struct file *file,
} }
} }
trace_mark(ext4_da_write_end, trace_ext4_da_write_end(inode, pos, len, copied);
"dev %s ino %lu pos %llu len %u copied %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, copied);
start = pos & (PAGE_CACHE_SIZE - 1); start = pos & (PAGE_CACHE_SIZE - 1);
end = start + copied - 1; end = start + copied - 1;
...@@ -3255,9 +3214,7 @@ static int ext4_normal_writepage(struct page *page, ...@@ -3255,9 +3214,7 @@ static int ext4_normal_writepage(struct page *page,
loff_t size = i_size_read(inode); loff_t size = i_size_read(inode);
loff_t len; loff_t len;
trace_mark(ext4_normal_writepage, trace_ext4_normal_writepage(inode, page);
"dev %s ino %lu page_index %lu",
inode->i_sb->s_id, inode->i_ino, page->index);
J_ASSERT(PageLocked(page)); J_ASSERT(PageLocked(page));
if (page->index == size >> PAGE_CACHE_SHIFT) if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK; len = size & ~PAGE_CACHE_MASK;
...@@ -3343,9 +3300,7 @@ static int ext4_journalled_writepage(struct page *page, ...@@ -3343,9 +3300,7 @@ static int ext4_journalled_writepage(struct page *page,
loff_t size = i_size_read(inode); loff_t size = i_size_read(inode);
loff_t len; loff_t len;
trace_mark(ext4_journalled_writepage, trace_ext4_journalled_writepage(inode, page);
"dev %s ino %lu page_index %lu",
inode->i_sb->s_id, inode->i_ino, page->index);
J_ASSERT(PageLocked(page)); J_ASSERT(PageLocked(page));
if (page->index == size >> PAGE_CACHE_SHIFT) if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK; len = size & ~PAGE_CACHE_MASK;
...@@ -3763,7 +3718,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q) ...@@ -3763,7 +3718,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
* (no partially truncated stuff there). */ * (no partially truncated stuff there). */
static Indirect *ext4_find_shared(struct inode *inode, int depth, static Indirect *ext4_find_shared(struct inode *inode, int depth,
ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top) ext4_lblk_t offsets[4], Indirect chain[4],
__le32 *top)
{ {
Indirect *partial, *p; Indirect *partial, *p;
int k, err; int k, err;
...@@ -3819,8 +3775,10 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, ...@@ -3819,8 +3775,10 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
* than `count' because there can be holes in there. * than `count' because there can be holes in there.
*/ */
static void ext4_clear_blocks(handle_t *handle, struct inode *inode, static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block_to_free, struct buffer_head *bh,
unsigned long count, __le32 *first, __le32 *last) ext4_fsblk_t block_to_free,
unsigned long count, __le32 *first,
__le32 *last)
{ {
__le32 *p; __le32 *p;
if (try_to_extend_transaction(handle, inode)) { if (try_to_extend_transaction(handle, inode)) {
...@@ -3837,10 +3795,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, ...@@ -3837,10 +3795,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
} }
/* /*
* Any buffers which are on the journal will be in memory. We find * Any buffers which are on the journal will be in memory. We
* them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget() * find them on the hash table so jbd2_journal_revoke() will
* on them. We've already detached each block from the file, so * run jbd2_journal_forget() on them. We've already detached
* bforget() in jbd2_journal_forget() should be safe. * each block from the file, so bforget() in
* jbd2_journal_forget() should be safe.
* *
* AKPM: turn on bforget in jbd2_journal_forget()!!! * AKPM: turn on bforget in jbd2_journal_forget()!!!
*/ */
...@@ -4212,7 +4171,7 @@ void ext4_truncate(struct inode *inode) ...@@ -4212,7 +4171,7 @@ void ext4_truncate(struct inode *inode)
(__le32*)partial->bh->b_data+addr_per_block, (__le32*)partial->bh->b_data+addr_per_block,
(chain+n-1) - partial); (chain+n-1) - partial);
BUFFER_TRACE(partial->bh, "call brelse"); BUFFER_TRACE(partial->bh, "call brelse");
brelse (partial->bh); brelse(partial->bh);
partial--; partial--;
} }
do_indirects: do_indirects:
...@@ -4453,6 +4412,7 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei) ...@@ -4453,6 +4412,7 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
if (flags & S_DIRSYNC) if (flags & S_DIRSYNC)
ei->i_flags |= EXT4_DIRSYNC_FL; ei->i_flags |= EXT4_DIRSYNC_FL;
} }
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
struct ext4_inode_info *ei) struct ext4_inode_info *ei)
{ {
...@@ -4795,7 +4755,8 @@ static int ext4_do_update_inode(handle_t *handle, ...@@ -4795,7 +4755,8 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le32(new_encode_dev(inode->i_rdev)); cpu_to_le32(new_encode_dev(inode->i_rdev));
raw_inode->i_block[2] = 0; raw_inode->i_block[2] = 0;
} }
} else for (block = 0; block < EXT4_N_BLOCKS; block++) } else
for (block = 0; block < EXT4_N_BLOCKS; block++)
raw_inode->i_block[block] = ei->i_data[block]; raw_inode->i_block[block] = ei->i_data[block];
raw_inode->i_disk_version = cpu_to_le32(inode->i_version); raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/file.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "ext4.h" #include "ext4.h"
...@@ -213,6 +214,41 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -213,6 +214,41 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return err; return err;
} }
case EXT4_IOC_MOVE_EXT: {
struct move_extent me;
struct file *donor_filp;
int err;
if (copy_from_user(&me,
(struct move_extent __user *)arg, sizeof(me)))
return -EFAULT;
donor_filp = fget(me.donor_fd);
if (!donor_filp)
return -EBADF;
if (!capable(CAP_DAC_OVERRIDE)) {
if ((current->real_cred->fsuid != inode->i_uid) ||
!(inode->i_mode & S_IRUSR) ||
!(donor_filp->f_dentry->d_inode->i_mode &
S_IRUSR)) {
fput(donor_filp);
return -EACCES;
}
}
err = ext4_move_extents(filp, donor_filp, me.orig_start,
me.donor_start, me.len, &me.moved_len);
fput(donor_filp);
if (!err)
if (copy_to_user((struct move_extent *)arg,
&me, sizeof(me)))
return -EFAULT;
return err;
}
case EXT4_IOC_GROUP_ADD: { case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input; struct ext4_new_group_data input;
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
......
...@@ -22,6 +22,8 @@ ...@@ -22,6 +22,8 @@
*/ */
#include "mballoc.h" #include "mballoc.h"
#include <trace/events/ext4.h>
/* /*
* MUSTDO: * MUSTDO:
* - test ext4_ext_search_left() and ext4_ext_search_right() * - test ext4_ext_search_left() and ext4_ext_search_right()
...@@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, ...@@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
ext4_group_t group); ext4_group_t group);
static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
static inline void *mb_correct_addr_and_bit(int *bit, void *addr) static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
{ {
#if BITS_PER_LONG == 64 #if BITS_PER_LONG == 64
...@@ -2859,8 +2859,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) ...@@ -2859,8 +2859,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+ entry->start_blk + entry->start_blk
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
sb->s_id, (unsigned long long) discard_block,
entry->count); entry->count);
sb_issue_discard(sb, discard_block, entry->count); sb_issue_discard(sb, discard_block, entry->count);
...@@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) ...@@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
mb_debug("new inode pa %p: %llu/%u for %u\n", pa, mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
pa->pa_pstart, pa->pa_len, pa->pa_lstart); pa->pa_pstart, pa->pa_len, pa->pa_lstart);
trace_mark(ext4_mb_new_inode_pa, trace_ext4_mb_new_inode_pa(ac, pa);
"dev %s ino %lu pstart %llu len %u lstart %u",
sb->s_id, ac->ac_inode->i_ino,
pa->pa_pstart, pa->pa_len, pa->pa_lstart);
ext4_mb_use_inode_pa(ac, pa); ext4_mb_use_inode_pa(ac, pa);
atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
...@@ -3692,8 +3688,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) ...@@ -3692,8 +3688,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
mb_debug("new group pa %p: %llu/%u for %u\n", pa, mb_debug("new group pa %p: %llu/%u for %u\n", pa,
pa->pa_pstart, pa->pa_len, pa->pa_lstart); pa->pa_pstart, pa->pa_len, pa->pa_lstart);
trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u", trace_ext4_mb_new_group_pa(ac, pa);
sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart);
ext4_mb_use_group_pa(ac, pa); ext4_mb_use_group_pa(ac, pa);
atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
...@@ -3783,9 +3778,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, ...@@ -3783,9 +3778,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
ext4_mb_store_history(ac); ext4_mb_store_history(ac);
} }
trace_mark(ext4_mb_release_inode_pa, trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
"dev %s ino %lu block %llu count %u",
sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit,
next - bit); next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1; bit = next + 1;
...@@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, ...@@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
if (ac) if (ac)
ac->ac_op = EXT4_MB_HISTORY_DISCARD; ac->ac_op = EXT4_MB_HISTORY_DISCARD;
trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d", trace_ext4_mb_release_group_pa(ac, pa);
sb->s_id, pa->pa_pstart, pa->pa_len);
BUG_ON(pa->pa_deleted == 0); BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0); BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
...@@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, ...@@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac)
ac->ac_sb = sb;
repeat: repeat:
ext4_lock_group(sb, group); ext4_lock_group(sb, group);
list_for_each_entry_safe(pa, tmp, list_for_each_entry_safe(pa, tmp,
...@@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode) ...@@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode)
} }
mb_debug("discard preallocation for inode %lu\n", inode->i_ino); mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id, trace_ext4_discard_preallocations(inode);
inode->i_ino);
INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac) {
ac->ac_sb = sb;
ac->ac_inode = inode;
}
repeat: repeat:
/* first, collect all pa's in the inode */ /* first, collect all pa's in the inode */
spin_lock(&ei->i_prealloc_lock); spin_lock(&ei->i_prealloc_lock);
...@@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, ...@@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
INIT_LIST_HEAD(&discard_list); INIT_LIST_HEAD(&discard_list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac)
ac->ac_sb = sb;
spin_lock(&lg->lg_prealloc_lock); spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
...@@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) ...@@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
int ret; int ret;
int freed = 0; int freed = 0;
trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", trace_ext4_mb_discard_preallocations(sb, needed);
sb->s_id, needed);
for (i = 0; i < ngroups && needed > 0; i++) { for (i = 0; i < ngroups && needed > 0; i++) {
ret = ext4_mb_discard_group_preallocations(sb, i, needed); ret = ext4_mb_discard_group_preallocations(sb, i, needed);
freed += ret; freed += ret;
...@@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ...@@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
sb = ar->inode->i_sb; sb = ar->inode->i_sb;
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu " trace_ext4_request_blocks(ar);
"lblk %llu goal %llu lleft %llu lright %llu "
"pleft %llu pright %llu ",
sb->s_id, ar->flags, ar->len,
ar->inode ? ar->inode->i_ino : 0,
(unsigned long long) ar->logical,
(unsigned long long) ar->goal,
(unsigned long long) ar->lleft,
(unsigned long long) ar->lright,
(unsigned long long) ar->pleft,
(unsigned long long) ar->pright);
/* /*
* For delayed allocation, we could skip the ENOSPC and * For delayed allocation, we could skip the ENOSPC and
...@@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ...@@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
} }
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (!ac) { if (ac) {
ac->ac_sb = sb;
ac->ac_inode = ar->inode;
} else {
ar->len = 0; ar->len = 0;
*errp = -ENOMEM; *errp = -ENOMEM;
goto out1; goto out1;
...@@ -4594,18 +4585,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ...@@ -4594,18 +4585,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
reserv_blks); reserv_blks);
} }
trace_mark(ext4_allocate_blocks, trace_ext4_allocate_blocks(ar, (unsigned long long)block);
"dev %s block %llu flags %u len %u ino %lu "
"logical %llu goal %llu lleft %llu lright %llu "
"pleft %llu pright %llu ",
sb->s_id, (unsigned long long) block,
ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0,
(unsigned long long) ar->logical,
(unsigned long long) ar->goal,
(unsigned long long) ar->lleft,
(unsigned long long) ar->lright,
(unsigned long long) ar->pleft,
(unsigned long long) ar->pright);
return block; return block;
} }
...@@ -4709,7 +4689,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ...@@ -4709,7 +4689,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* Main entry point into mballoc to free blocks * Main entry point into mballoc to free blocks
*/ */
void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
unsigned long block, unsigned long count, ext4_fsblk_t block, unsigned long count,
int metadata, unsigned long *freed) int metadata, unsigned long *freed)
{ {
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
...@@ -4735,15 +4715,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4735,15 +4715,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
block + count > ext4_blocks_count(es)) { block + count > ext4_blocks_count(es)) {
ext4_error(sb, __func__, ext4_error(sb, __func__,
"Freeing blocks not in datazone - " "Freeing blocks not in datazone - "
"block = %lu, count = %lu", block, count); "block = %llu, count = %lu", block, count);
goto error_return; goto error_return;
} }
ext4_debug("freeing block %lu\n", block); ext4_debug("freeing block %llu\n", block);
trace_mark(ext4_free_blocks, trace_ext4_free_blocks(inode, block, count, metadata);
"dev %s block %llu count %lu metadata %d ino %lu",
sb->s_id, (unsigned long long) block, count, metadata,
inode ? inode->i_ino : 0);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac) { if (ac) {
...@@ -4784,7 +4761,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4784,7 +4761,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
ext4_error(sb, __func__, ext4_error(sb, __func__,
"Freeing blocks in system zone - " "Freeing blocks in system zone - "
"Block = %lu, count = %lu", block, count); "Block = %llu, count = %lu", block, count);
/* err = 0. ext4_std_error should be a no op */ /* err = 0. ext4_std_error should be a no op */
goto error_return; goto error_return;
} }
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/version.h> #include <linux/version.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/marker.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "ext4.h" #include "ext4.h"
......
...@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode) ...@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode)
struct inode *tmp_inode = NULL; struct inode *tmp_inode = NULL;
struct list_blocks_struct lb; struct list_blocks_struct lb;
unsigned long max_entries; unsigned long max_entries;
__u32 goal;
/* /*
* If the filesystem does not support extents, or the inode * If the filesystem does not support extents, or the inode
...@@ -483,9 +484,10 @@ int ext4_ext_migrate(struct inode *inode) ...@@ -483,9 +484,10 @@ int ext4_ext_migrate(struct inode *inode)
retval = PTR_ERR(handle); retval = PTR_ERR(handle);
return retval; return retval;
} }
tmp_inode = ext4_new_inode(handle, goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
inode->i_sb->s_root->d_inode, EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
S_IFREG); tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
S_IFREG, 0, goal);
if (IS_ERR(tmp_inode)) { if (IS_ERR(tmp_inode)) {
retval = -ENOMEM; retval = -ENOMEM;
ext4_journal_stop(handle); ext4_journal_stop(handle);
......
/*
* Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
* Written by Takashi Sato <t-sato@yk.jp.nec.com>
* Akira Fujita <a-fujita@rs.jp.nec.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/fs.h>
#include <linux/quotaops.h>
#include "ext4_jbd2.h"
#include "ext4_extents.h"
#include "ext4.h"
#define get_ext_path(path, inode, block, ret) \
do { \
path = ext4_ext_find_extent(inode, block, path); \
if (IS_ERR(path)) { \
ret = PTR_ERR(path); \
path = NULL; \
} \
} while (0)
/**
* copy_extent_status - Copy the extent's initialization status
*
* @src: an extent for getting initialize status
* @dest: an extent to be set the status
*/
static void
copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
{
if (ext4_ext_is_uninitialized(src))
ext4_ext_mark_uninitialized(dest);
else
dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
}
/**
* mext_next_extent - Search for the next extent and set it to "extent"
*
* @inode: inode which is searched
* @path: this will obtain data for the next extent
* @extent: pointer to the next extent we have just gotten
*
* Search the next extent in the array of ext4_ext_path structure (@path)
* and set it to ext4_extent structure (@extent). In addition, the member of
* @path (->p_ext) also points the next extent. Return 0 on success, 1 if
* ext4_ext_path structure refers to the last extent, or a negative error
* value on failure.
*/
static int
mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
struct ext4_extent **extent)
{
int ppos, leaf_ppos = path->p_depth;
ppos = leaf_ppos;
if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
/* leaf block */
*extent = ++path[ppos].p_ext;
return 0;
}
while (--ppos >= 0) {
if (EXT_LAST_INDEX(path[ppos].p_hdr) >
path[ppos].p_idx) {
int cur_ppos = ppos;
/* index block */
path[ppos].p_idx++;
path[ppos].p_block = idx_pblock(path[ppos].p_idx);
if (path[ppos+1].p_bh)
brelse(path[ppos+1].p_bh);
path[ppos+1].p_bh =
sb_bread(inode->i_sb, path[ppos].p_block);
if (!path[ppos+1].p_bh)
return -EIO;
path[ppos+1].p_hdr =
ext_block_hdr(path[ppos+1].p_bh);
/* Halfway index block */
while (++cur_ppos < leaf_ppos) {
path[cur_ppos].p_idx =
EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
path[cur_ppos].p_block =
idx_pblock(path[cur_ppos].p_idx);
if (path[cur_ppos+1].p_bh)
brelse(path[cur_ppos+1].p_bh);
path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
path[cur_ppos].p_block);
if (!path[cur_ppos+1].p_bh)
return -EIO;
path[cur_ppos+1].p_hdr =
ext_block_hdr(path[cur_ppos+1].p_bh);
}
/* leaf block */
path[leaf_ppos].p_ext = *extent =
EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
return 0;
}
}
/* We found the last extent */
return 1;
}
/**
* mext_double_down_read - Acquire two inodes' read semaphore
*
* @orig_inode: original inode structure
* @donor_inode: donor inode structure
* Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
*/
static void
mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
{
struct inode *first = orig_inode, *second = donor_inode;
BUG_ON(orig_inode == NULL || donor_inode == NULL);
/*
* Use the inode number to provide the stable locking order instead
* of its address, because the C language doesn't guarantee you can
* compare pointers that don't come from the same array.
*/
if (donor_inode->i_ino < orig_inode->i_ino) {
first = donor_inode;
second = orig_inode;
}
down_read(&EXT4_I(first)->i_data_sem);
down_read(&EXT4_I(second)->i_data_sem);
}
/**
* mext_double_down_write - Acquire two inodes' write semaphore
*
* @orig_inode: original inode structure
* @donor_inode: donor inode structure
* Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
*/
static void
mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
{
struct inode *first = orig_inode, *second = donor_inode;
BUG_ON(orig_inode == NULL || donor_inode == NULL);
/*
* Use the inode number to provide the stable locking order instead
* of its address, because the C language doesn't guarantee you can
* compare pointers that don't come from the same array.
*/
if (donor_inode->i_ino < orig_inode->i_ino) {
first = donor_inode;
second = orig_inode;
}
down_write(&EXT4_I(first)->i_data_sem);
down_write(&EXT4_I(second)->i_data_sem);
}
/**
* mext_double_up_read - Release two inodes' read semaphore
*
* @orig_inode: original inode structure to be released its lock first
* @donor_inode: donor inode structure to be released its lock second
* Release read semaphore of two inodes (orig and donor).
*/
static void
mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
{
BUG_ON(orig_inode == NULL || donor_inode == NULL);
up_read(&EXT4_I(orig_inode)->i_data_sem);
up_read(&EXT4_I(donor_inode)->i_data_sem);
}
/**
* mext_double_up_write - Release two inodes' write semaphore
*
* @orig_inode: original inode structure to be released its lock first
* @donor_inode: donor inode structure to be released its lock second
* Release write semaphore of two inodes (orig and donor).
*/
static void
mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
{
BUG_ON(orig_inode == NULL || donor_inode == NULL);
up_write(&EXT4_I(orig_inode)->i_data_sem);
up_write(&EXT4_I(donor_inode)->i_data_sem);
}
/**
* mext_insert_across_blocks - Insert extents across leaf block
*
* @handle: journal handle
* @orig_inode: original inode
* @o_start: first original extent to be changed
* @o_end: last original extent to be changed
* @start_ext: first new extent to be inserted
* @new_ext: middle of new extent to be inserted
* @end_ext: last new extent to be inserted
*
* Allocate a new leaf block and insert extents into it. Return 0 on success,
* or a negative error value on failure.
*/
static int
mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
struct ext4_extent *o_start, struct ext4_extent *o_end,
struct ext4_extent *start_ext, struct ext4_extent *new_ext,
struct ext4_extent *end_ext)
{
struct ext4_ext_path *orig_path = NULL;
ext4_lblk_t eblock = 0;
int new_flag = 0;
int end_flag = 0;
int err = 0;
if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
if (o_start == o_end) {
/* start_ext new_ext end_ext
* donor |---------|-----------|--------|
* orig |------------------------------|
*/
end_flag = 1;
} else {
/* start_ext new_ext end_ext
* donor |---------|----------|---------|
* orig |---------------|--------------|
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
}
o_start->ee_len = start_ext->ee_len;
new_flag = 1;
} else if (start_ext->ee_len && new_ext->ee_len &&
!end_ext->ee_len && o_start == o_end) {
/* start_ext new_ext
* donor |--------------|---------------|
* orig |------------------------------|
*/
o_start->ee_len = start_ext->ee_len;
new_flag = 1;
} else if (!start_ext->ee_len && new_ext->ee_len &&
end_ext->ee_len && o_start == o_end) {
/* new_ext end_ext
* donor |--------------|---------------|
* orig |------------------------------|
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
/*
* Set 0 to the extent block if new_ext was
* the first block.
*/
if (new_ext->ee_block)
eblock = le32_to_cpu(new_ext->ee_block);
new_flag = 1;
} else {
ext4_debug("ext4 move extent: Unexpected insert case\n");
return -EIO;
}
if (new_flag) {
get_ext_path(orig_path, orig_inode, eblock, err);
if (orig_path == NULL)
goto out;
if (ext4_ext_insert_extent(handle, orig_inode,
orig_path, new_ext))
goto out;
}
if (end_flag) {
get_ext_path(orig_path, orig_inode,
le32_to_cpu(end_ext->ee_block) - 1, err);
if (orig_path == NULL)
goto out;
if (ext4_ext_insert_extent(handle, orig_inode,
orig_path, end_ext))
goto out;
}
out:
if (orig_path) {
ext4_ext_drop_refs(orig_path);
kfree(orig_path);
}
return err;
}
/**
* mext_insert_inside_block - Insert new extent to the extent block
*
* @o_start: first original extent to be moved
* @o_end: last original extent to be moved
* @start_ext: first new extent to be inserted
* @new_ext: middle of new extent to be inserted
* @end_ext: last new extent to be inserted
* @eh: extent header of target leaf block
* @range_to_move: used to decide how to insert extent
*
* Insert extents into the leaf block. The extent (@o_start) is overwritten
* by inserted extents.
*/
static void
mext_insert_inside_block(struct ext4_extent *o_start,
struct ext4_extent *o_end,
struct ext4_extent *start_ext,
struct ext4_extent *new_ext,
struct ext4_extent *end_ext,
struct ext4_extent_header *eh,
int range_to_move)
{
int i = 0;
unsigned long len;
/* Move the existing extents */
if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
(unsigned long)(o_end + 1);
memmove(o_end + 1 + range_to_move, o_end + 1, len);
}
/* Insert start entry */
if (start_ext->ee_len)
o_start[i++].ee_len = start_ext->ee_len;
/* Insert new entry */
if (new_ext->ee_len) {
o_start[i] = *new_ext;
ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
}
/* Insert end entry */
if (end_ext->ee_len)
o_start[i] = *end_ext;
/* Increment the total entries counter on the extent block */
le16_add_cpu(&eh->eh_entries, range_to_move);
}
/**
* mext_insert_extents - Insert new extent
*
* @handle: journal handle
* @orig_inode: original inode
* @orig_path: path indicates first extent to be changed
* @o_start: first original extent to be changed
* @o_end: last original extent to be changed
* @start_ext: first new extent to be inserted
* @new_ext: middle of new extent to be inserted
* @end_ext: last new extent to be inserted
*
* Call the function to insert extents. If we cannot add more extents into
* the leaf block, we call mext_insert_across_blocks() to create a
* new leaf block. Otherwise call mext_insert_inside_block(). Return 0
* on success, or a negative error value on failure.
*/
static int
mext_insert_extents(handle_t *handle, struct inode *orig_inode,
struct ext4_ext_path *orig_path,
struct ext4_extent *o_start,
struct ext4_extent *o_end,
struct ext4_extent *start_ext,
struct ext4_extent *new_ext,
struct ext4_extent *end_ext)
{
struct ext4_extent_header *eh;
unsigned long need_slots, slots_range;
int range_to_move, depth, ret;
/*
* The extents need to be inserted
* start_extent + new_extent + end_extent.
*/
need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
(new_ext->ee_len ? 1 : 0);
/* The number of slots between start and end */
slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
/ sizeof(struct ext4_extent);
/* Range to move the end of extent */
range_to_move = need_slots - slots_range;
depth = orig_path->p_depth;
orig_path += depth;
eh = orig_path->p_hdr;
if (depth) {
/* Register to journal */
ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
if (ret)
return ret;
}
/* Expansion */
if (range_to_move > 0 &&
(range_to_move > le16_to_cpu(eh->eh_max)
- le16_to_cpu(eh->eh_entries))) {
ret = mext_insert_across_blocks(handle, orig_inode, o_start,
o_end, start_ext, new_ext, end_ext);
if (ret < 0)
return ret;
} else
mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
end_ext, eh, range_to_move);
if (depth) {
ret = ext4_handle_dirty_metadata(handle, orig_inode,
orig_path->p_bh);
if (ret)
return ret;
} else {
ret = ext4_mark_inode_dirty(handle, orig_inode);
if (ret < 0)
return ret;
}
return 0;
}
/**
* mext_leaf_block - Move one leaf extent block into the inode.
*
* @handle: journal handle
* @orig_inode: original inode
* @orig_path: path indicates first extent to be changed
* @dext: donor extent
* @from: start offset on the target file
*
* In order to insert extents into the leaf block, we must divide the extent
* in the leaf block into three extents. The one is located to be inserted
* extents, and the others are located around it.
*
* Therefore, this function creates structures to save extents of the leaf
* block, and inserts extents by calling mext_insert_extents() with
* created extents. Return 0 on success, or a negative error value on failure.
*/
static int
mext_leaf_block(handle_t *handle, struct inode *orig_inode,
struct ext4_ext_path *orig_path, struct ext4_extent *dext,
ext4_lblk_t *from)
{
struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
struct ext4_extent new_ext, start_ext, end_ext;
ext4_lblk_t new_ext_end;
ext4_fsblk_t new_phys_end;
int oext_alen, new_ext_alen, end_ext_alen;
int depth = ext_depth(orig_inode);
int ret;
o_start = o_end = oext = orig_path[depth].p_ext;
oext_alen = ext4_ext_get_actual_len(oext);
start_ext.ee_len = end_ext.ee_len = 0;
new_ext.ee_block = cpu_to_le32(*from);
ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
new_ext.ee_len = dext->ee_len;
new_ext_alen = ext4_ext_get_actual_len(&new_ext);
new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
/*
* Case: original extent is first
* oext |--------|
* new_ext |--|
* start_ext |--|
*/
if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
le32_to_cpu(new_ext.ee_block) <
le32_to_cpu(oext->ee_block) + oext_alen) {
start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
le32_to_cpu(oext->ee_block));
copy_extent_status(oext, &start_ext);
} else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
prev_ext = oext - 1;
/*
* We can merge new_ext into previous extent,
* if these are contiguous and same extent type.
*/
if (ext4_can_extents_be_merged(orig_inode, prev_ext,
&new_ext)) {
o_start = prev_ext;
start_ext.ee_len = cpu_to_le16(
ext4_ext_get_actual_len(prev_ext) +
new_ext_alen);
copy_extent_status(prev_ext, &start_ext);
new_ext.ee_len = 0;
}
}
/*
* Case: new_ext_end must be less than oext
* oext |-----------|
* new_ext |-------|
*/
BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end);
/*
* Case: new_ext is smaller than original extent
* oext |---------------|
* new_ext |-----------|
* end_ext |---|
*/
if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
end_ext.ee_len =
cpu_to_le16(le32_to_cpu(oext->ee_block) +
oext_alen - 1 - new_ext_end);
copy_extent_status(oext, &end_ext);
end_ext_alen = ext4_ext_get_actual_len(&end_ext);
ext4_ext_store_pblock(&end_ext,
(ext_pblock(o_end) + oext_alen - end_ext_alen));
end_ext.ee_block =
cpu_to_le32(le32_to_cpu(o_end->ee_block) +
oext_alen - end_ext_alen);
}
ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
o_end, &start_ext, &new_ext, &end_ext);
return ret;
}
/**
* mext_calc_swap_extents - Calculate extents for extent swapping.
*
* @tmp_dext: the extent that will belong to the original inode
* @tmp_oext: the extent that will belong to the donor inode
* @orig_off: block offset of original inode
* @donor_off: block offset of donor inode
* @max_count: the maximun length of extents
*/
static void
mext_calc_swap_extents(struct ext4_extent *tmp_dext,
struct ext4_extent *tmp_oext,
ext4_lblk_t orig_off, ext4_lblk_t donor_off,
ext4_lblk_t max_count)
{
ext4_lblk_t diff, orig_diff;
struct ext4_extent dext_old, oext_old;
dext_old = *tmp_dext;
oext_old = *tmp_oext;
/* When tmp_dext is too large, pick up the target range. */
diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
tmp_dext->ee_block =
cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
if (max_count < ext4_ext_get_actual_len(tmp_dext))
tmp_dext->ee_len = cpu_to_le16(max_count);
orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
/* Adjust extent length if donor extent is larger than orig */
if (ext4_ext_get_actual_len(tmp_dext) >
ext4_ext_get_actual_len(tmp_oext) - orig_diff)
tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
orig_diff);
tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
copy_extent_status(&oext_old, tmp_dext);
copy_extent_status(&dext_old, tmp_oext);
}
/**
* mext_replace_branches - Replace original extents with new extents
*
* @handle: journal handle
* @orig_inode: original inode
* @donor_inode: donor inode
* @from: block offset of orig_inode
* @count: block count to be replaced
*
* Replace original inode extents and donor inode extents page by page.
* We implement this replacement in the following three steps:
* 1. Save the block information of original and donor inodes into
* dummy extents.
* 2. Change the block information of original inode to point at the
* donor inode blocks.
* 3. Change the block information of donor inode to point at the saved
* original inode blocks in the dummy extents.
*
* Return 0 on success, or a negative error value on failure.
*/
static int
mext_replace_branches(handle_t *handle, struct inode *orig_inode,
struct inode *donor_inode, ext4_lblk_t from,
ext4_lblk_t count)
{
struct ext4_ext_path *orig_path = NULL;
struct ext4_ext_path *donor_path = NULL;
struct ext4_extent *oext, *dext;
struct ext4_extent tmp_dext, tmp_oext;
ext4_lblk_t orig_off = from, donor_off = from;
int err = 0;
int depth;
int replaced_count = 0;
int dext_alen;
mext_double_down_write(orig_inode, donor_inode);
/* Get the original extent for the block "orig_off" */
get_ext_path(orig_path, orig_inode, orig_off, err);
if (orig_path == NULL)
goto out;
/* Get the donor extent for the head */
get_ext_path(donor_path, donor_inode, donor_off, err);
if (donor_path == NULL)
goto out;
depth = ext_depth(orig_inode);
oext = orig_path[depth].p_ext;
tmp_oext = *oext;
depth = ext_depth(donor_inode);
dext = donor_path[depth].p_ext;
tmp_dext = *dext;
mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
donor_off, count);
/* Loop for the donor extents */
while (1) {
/* The extent for donor must be found. */
BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block));
/* Set donor extent to orig extent */
err = mext_leaf_block(handle, orig_inode,
orig_path, &tmp_dext, &orig_off);
if (err < 0)
goto out;
/* Set orig extent to donor extent */
err = mext_leaf_block(handle, donor_inode,
donor_path, &tmp_oext, &donor_off);
if (err < 0)
goto out;
dext_alen = ext4_ext_get_actual_len(&tmp_dext);
replaced_count += dext_alen;
donor_off += dext_alen;
orig_off += dext_alen;
/* Already moved the expected blocks */
if (replaced_count >= count)
break;
if (orig_path)
ext4_ext_drop_refs(orig_path);
get_ext_path(orig_path, orig_inode, orig_off, err);
if (orig_path == NULL)
goto out;
depth = ext_depth(orig_inode);
oext = orig_path[depth].p_ext;
if (le32_to_cpu(oext->ee_block) +
ext4_ext_get_actual_len(oext) <= orig_off) {
err = 0;
goto out;
}
tmp_oext = *oext;
if (donor_path)
ext4_ext_drop_refs(donor_path);
get_ext_path(donor_path, donor_inode,
donor_off, err);
if (donor_path == NULL)
goto out;
depth = ext_depth(donor_inode);
dext = donor_path[depth].p_ext;
if (le32_to_cpu(dext->ee_block) +
ext4_ext_get_actual_len(dext) <= donor_off) {
err = 0;
goto out;
}
tmp_dext = *dext;
mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
donor_off,
count - replaced_count);
}
out:
if (orig_path) {
ext4_ext_drop_refs(orig_path);
kfree(orig_path);
}
if (donor_path) {
ext4_ext_drop_refs(donor_path);
kfree(donor_path);
}
mext_double_up_write(orig_inode, donor_inode);
return err;
}
/**
* move_extent_per_page - Move extent data per page
*
* @o_filp: file structure of original file
* @donor_inode: donor inode
* @orig_page_offset: page index on original file
* @data_offset_in_page: block index where data swapping starts
* @block_len_in_page: the number of blocks to be swapped
* @uninit: orig extent is uninitialized or not
*
* Save the data in original inode blocks and replace original inode extents
* with donor inode extents by calling mext_replace_branches().
* Finally, write out the saved data in new original inode blocks. Return 0
* on success, or a negative error value on failure.
*/
static int
move_extent_par_page(struct file *o_filp, struct inode *donor_inode,
pgoff_t orig_page_offset, int data_offset_in_page,
int block_len_in_page, int uninit)
{
struct inode *orig_inode = o_filp->f_dentry->d_inode;
struct address_space *mapping = orig_inode->i_mapping;
struct buffer_head *bh;
struct page *page = NULL;
const struct address_space_operations *a_ops = mapping->a_ops;
handle_t *handle;
ext4_lblk_t orig_blk_offset;
long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
unsigned int w_flags = 0;
unsigned int tmp_data_len, data_len;
void *fsdata;
int ret, i, jblocks;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
/*
* It needs twice the amount of ordinary journal buffers because
* inode and donor_inode may change each different metadata blocks.
*/
jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
handle = ext4_journal_start(orig_inode, jblocks);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
return ret;
}
if (segment_eq(get_fs(), KERNEL_DS))
w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
orig_blk_offset = orig_page_offset * blocks_per_page +
data_offset_in_page;
/*
* If orig extent is uninitialized one,
* it's not necessary force the page into memory
* and then force it to be written out again.
* Just swap data blocks between orig and donor.
*/
if (uninit) {
ret = mext_replace_branches(handle, orig_inode,
donor_inode, orig_blk_offset,
block_len_in_page);
/* Clear the inode cache not to refer to the old data */
ext4_ext_invalidate_cache(orig_inode);
ext4_ext_invalidate_cache(donor_inode);
goto out2;
}
offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
/* Calculate data_len */
if ((orig_blk_offset + block_len_in_page - 1) ==
((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
/* Replace the last block */
tmp_data_len = orig_inode->i_size & (blocksize - 1);
/*
* If data_len equal zero, it shows data_len is multiples of
* blocksize. So we set appropriate value.
*/
if (tmp_data_len == 0)
tmp_data_len = blocksize;
data_len = tmp_data_len +
((block_len_in_page - 1) << orig_inode->i_blkbits);
} else {
data_len = block_len_in_page << orig_inode->i_blkbits;
}
ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
&page, &fsdata);
if (unlikely(ret < 0))
goto out;
if (!PageUptodate(page)) {
mapping->a_ops->readpage(o_filp, page);
lock_page(page);
}
/*
* try_to_release_page() doesn't call releasepage in writeback mode.
* We should care about the order of writing to the same file
* by multiple move extent processes.
* It needs to call wait_on_page_writeback() to wait for the
* writeback of the page.
*/
if (PageWriteback(page))
wait_on_page_writeback(page);
/* Release old bh and drop refs */
try_to_release_page(page, 0);
ret = mext_replace_branches(handle, orig_inode, donor_inode,
orig_blk_offset, block_len_in_page);
if (ret < 0)
goto out;
/* Clear the inode cache not to refer to the old data */
ext4_ext_invalidate_cache(orig_inode);
ext4_ext_invalidate_cache(donor_inode);
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
bh = page_buffers(page);
for (i = 0; i < data_offset_in_page; i++)
bh = bh->b_this_page;
for (i = 0; i < block_len_in_page; i++) {
ret = ext4_get_block(orig_inode,
(sector_t)(orig_blk_offset + i), bh, 0);
if (ret < 0)
goto out;
if (bh->b_this_page != NULL)
bh = bh->b_this_page;
}
ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
page, fsdata);
page = NULL;
out:
if (unlikely(page)) {
if (PageLocked(page))
unlock_page(page);
page_cache_release(page);
}
out2:
ext4_journal_stop(handle);
return ret < 0 ? ret : 0;
}
/**
* mext_check_argumants - Check whether move extent can be done
*
* @orig_inode: original inode
* @donor_inode: donor inode
* @orig_start: logical start offset in block for orig
* @donor_start: logical start offset in block for donor
* @len: the number of blocks to be moved
* @moved_len: moved block length
*
* Check the arguments of ext4_move_extents() whether the files can be
* exchanged with each other.
* Return 0 on success, or a negative error value on failure.
*/
static int
mext_check_arguments(struct inode *orig_inode,
struct inode *donor_inode, __u64 orig_start,
__u64 donor_start, __u64 *len, __u64 moved_len)
{
/* Regular file check */
if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
ext4_debug("ext4 move extent: The argument files should be "
"regular file [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* Ext4 move extent does not support swapfile */
if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
ext4_debug("ext4 move extent: The argument files should "
"not be swapfile [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* Files should be in the same ext4 FS */
if (orig_inode->i_sb != donor_inode->i_sb) {
ext4_debug("ext4 move extent: The argument files "
"should be in same FS [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* orig and donor should be different file */
if (orig_inode->i_ino == donor_inode->i_ino) {
ext4_debug("ext4 move extent: The argument files should not "
"be same file [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* Ext4 move extent supports only extent based file */
if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) {
ext4_debug("ext4 move extent: orig file is not extents "
"based file [ino:orig %lu]\n", orig_inode->i_ino);
return -EOPNOTSUPP;
} else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) {
ext4_debug("ext4 move extent: donor file is not extents "
"based file [ino:donor %lu]\n", donor_inode->i_ino);
return -EOPNOTSUPP;
}
if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
ext4_debug("ext4 move extent: File size is 0 byte\n");
return -EINVAL;
}
/* Start offset should be same */
if (orig_start != donor_start) {
ext4_debug("ext4 move extent: orig and donor's start "
"offset are not same [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if (moved_len) {
ext4_debug("ext4 move extent: moved_len should be 0 "
"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
donor_inode->i_ino);
return -EINVAL;
}
if ((orig_start > MAX_DEFRAG_SIZE) ||
(donor_start > MAX_DEFRAG_SIZE) ||
(*len > MAX_DEFRAG_SIZE) ||
(orig_start + *len > MAX_DEFRAG_SIZE)) {
ext4_debug("ext4 move extent: Can't handle over [%lu] blocks "
"[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if (orig_inode->i_size > donor_inode->i_size) {
if (orig_start >= donor_inode->i_size) {
ext4_debug("ext4 move extent: orig start offset "
"[%llu] should be less than donor file size "
"[%lld] [ino:orig %lu, donor_inode %lu]\n",
orig_start, donor_inode->i_size,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if (orig_start + *len > donor_inode->i_size) {
ext4_debug("ext4 move extent: End offset [%llu] should "
"be less than donor file size [%lld]."
"So adjust length from %llu to %lld "
"[ino:orig %lu, donor %lu]\n",
orig_start + *len, donor_inode->i_size,
*len, donor_inode->i_size - orig_start,
orig_inode->i_ino, donor_inode->i_ino);
*len = donor_inode->i_size - orig_start;
}
} else {
if (orig_start >= orig_inode->i_size) {
ext4_debug("ext4 move extent: start offset [%llu] "
"should be less than original file size "
"[%lld] [inode:orig %lu, donor %lu]\n",
orig_start, orig_inode->i_size,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if (orig_start + *len > orig_inode->i_size) {
ext4_debug("ext4 move extent: Adjust length "
"from %llu to %lld. Because it should be "
"less than original file size "
"[ino:orig %lu, donor %lu]\n",
*len, orig_inode->i_size - orig_start,
orig_inode->i_ino, donor_inode->i_ino);
*len = orig_inode->i_size - orig_start;
}
}
if (!*len) {
ext4_debug("ext4 move extent: len shoudld not be 0 "
"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
donor_inode->i_ino);
return -EINVAL;
}
return 0;
}
/**
* mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
*
* @inode1: the inode structure
* @inode2: the inode structure
*
* Lock two inodes' i_mutex by i_ino order. This function is moved from
* fs/inode.c.
*/
static void
mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
{
if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
if (inode1)
mutex_lock(&inode1->i_mutex);
else if (inode2)
mutex_lock(&inode2->i_mutex);
return;
}
if (inode1->i_ino < inode2->i_ino) {
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
}
}
/**
* mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
*
* @inode1: the inode that is released first
* @inode2: the inode that is released second
*
* This function is moved from fs/inode.c.
*/
static void
mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
{
if (inode1)
mutex_unlock(&inode1->i_mutex);
if (inode2 && inode2 != inode1)
mutex_unlock(&inode2->i_mutex);
}
/**
* ext4_move_extents - Exchange the specified range of a file
*
* @o_filp: file structure of the original file
* @d_filp: file structure of the donor file
* @orig_start: start offset in block for orig
* @donor_start: start offset in block for donor
* @len: the number of blocks to be moved
* @moved_len: moved block length
*
* This function returns 0 and moved block length is set in moved_len
* if succeed, otherwise returns error value.
*
* Note: ext4_move_extents() proceeds the following order.
* 1:ext4_move_extents() calculates the last block number of moving extent
* function by the start block number (orig_start) and the number of blocks
* to be moved (len) specified as arguments.
* If the {orig, donor}_start points a hole, the extent's start offset
* pointed by ext_cur (current extent), holecheck_path, orig_path are set
* after hole behind.
* 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
* or the ext_cur exceeds the block_end which is last logical block number.
* 3:To get the length of continues area, call mext_next_extent()
* specified with the ext_cur (initial value is holecheck_path) re-cursive,
* until find un-continuous extent, the start logical block number exceeds
* the block_end or the extent points to the last extent.
* 4:Exchange the original inode data with donor inode data
* from orig_page_offset to seq_end_page.
* The start indexes of data are specified as arguments.
* That of the original inode is orig_page_offset,
* and the donor inode is also orig_page_offset
* (To easily handle blocksize != pagesize case, the offset for the
* donor inode is block unit).
* 5:Update holecheck_path and orig_path to points a next proceeding extent,
* then returns to step 2.
* 6:Release holecheck_path, orig_path and set the len to moved_len
* which shows the number of moved blocks.
* The moved_len is useful for the command to calculate the file offset
* for starting next move extent ioctl.
* 7:Return 0 on success, or a negative error value on failure.
*/
int
ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 orig_start, __u64 donor_start, __u64 len,
__u64 *moved_len)
{
struct inode *orig_inode = o_filp->f_dentry->d_inode;
struct inode *donor_inode = d_filp->f_dentry->d_inode;
struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
ext4_lblk_t block_start = orig_start;
ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
ext4_lblk_t rest_blocks;
pgoff_t orig_page_offset = 0, seq_end_page;
int ret, depth, last_extent = 0;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
int data_offset_in_page;
int block_len_in_page;
int uninit;
/* protect orig and donor against a truncate */
mext_inode_double_lock(orig_inode, donor_inode);
mext_double_down_read(orig_inode, donor_inode);
/* Check the filesystem environment whether move_extent can be done */
ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
donor_start, &len, *moved_len);
mext_double_up_read(orig_inode, donor_inode);
if (ret)
goto out2;
file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
block_end = block_start + len - 1;
if (file_end < block_end)
len -= block_end - file_end;
get_ext_path(orig_path, orig_inode, block_start, ret);
if (orig_path == NULL)
goto out2;
/* Get path structure to check the hole */
get_ext_path(holecheck_path, orig_inode, block_start, ret);
if (holecheck_path == NULL)
goto out;
depth = ext_depth(orig_inode);
ext_cur = holecheck_path[depth].p_ext;
if (ext_cur == NULL) {
ret = -EINVAL;
goto out;
}
/*
* Get proper extent whose ee_block is beyond block_start
* if block_start was within the hole.
*/
if (le32_to_cpu(ext_cur->ee_block) +
ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
last_extent = mext_next_extent(orig_inode,
holecheck_path, &ext_cur);
if (last_extent < 0) {
ret = last_extent;
goto out;
}
last_extent = mext_next_extent(orig_inode, orig_path,
&ext_dummy);
if (last_extent < 0) {
ret = last_extent;
goto out;
}
}
seq_start = block_start;
/* No blocks within the specified range. */
if (le32_to_cpu(ext_cur->ee_block) > block_end) {
ext4_debug("ext4 move extent: The specified range of file "
"may be the hole\n");
ret = -EINVAL;
goto out;
}
/* Adjust start blocks */
add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
ext4_ext_get_actual_len(ext_cur), block_end + 1) -
max(le32_to_cpu(ext_cur->ee_block), block_start);
while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
seq_blocks += add_blocks;
/* Adjust tail blocks */
if (seq_start + seq_blocks - 1 > block_end)
seq_blocks = block_end - seq_start + 1;
ext_prev = ext_cur;
last_extent = mext_next_extent(orig_inode, holecheck_path,
&ext_cur);
if (last_extent < 0) {
ret = last_extent;
break;
}
add_blocks = ext4_ext_get_actual_len(ext_cur);
/*
* Extend the length of contiguous block (seq_blocks)
* if extents are contiguous.
*/
if (ext4_can_extents_be_merged(orig_inode,
ext_prev, ext_cur) &&
block_end >= le32_to_cpu(ext_cur->ee_block) &&
!last_extent)
continue;
/* Is original extent is uninitialized */
uninit = ext4_ext_is_uninitialized(ext_prev);
data_offset_in_page = seq_start % blocks_per_page;
/*
* Calculate data blocks count that should be swapped
* at the first page.
*/
if (data_offset_in_page + seq_blocks > blocks_per_page) {
/* Swapped blocks are across pages */
block_len_in_page =
blocks_per_page - data_offset_in_page;
} else {
/* Swapped blocks are in a page */
block_len_in_page = seq_blocks;
}
orig_page_offset = seq_start >>
(PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
seq_end_page = (seq_start + seq_blocks - 1) >>
(PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
seq_start = le32_to_cpu(ext_cur->ee_block);
rest_blocks = seq_blocks;
/* Discard preallocations of two inodes */
down_write(&EXT4_I(orig_inode)->i_data_sem);
ext4_discard_preallocations(orig_inode);
up_write(&EXT4_I(orig_inode)->i_data_sem);
down_write(&EXT4_I(donor_inode)->i_data_sem);
ext4_discard_preallocations(donor_inode);
up_write(&EXT4_I(donor_inode)->i_data_sem);
while (orig_page_offset <= seq_end_page) {
/* Swap original branches with new branches */
ret = move_extent_par_page(o_filp, donor_inode,
orig_page_offset,
data_offset_in_page,
block_len_in_page, uninit);
if (ret < 0)
goto out;
orig_page_offset++;
/* Count how many blocks we have exchanged */
*moved_len += block_len_in_page;
BUG_ON(*moved_len > len);
data_offset_in_page = 0;
rest_blocks -= block_len_in_page;
if (rest_blocks > blocks_per_page)
block_len_in_page = blocks_per_page;
else
block_len_in_page = rest_blocks;
}
/* Decrease buffer counter */
if (holecheck_path)
ext4_ext_drop_refs(holecheck_path);
get_ext_path(holecheck_path, orig_inode,
seq_start, ret);
if (holecheck_path == NULL)
break;
depth = holecheck_path->p_depth;
/* Decrease buffer counter */
if (orig_path)
ext4_ext_drop_refs(orig_path);
get_ext_path(orig_path, orig_inode, seq_start, ret);
if (orig_path == NULL)
break;
ext_cur = holecheck_path[depth].p_ext;
add_blocks = ext4_ext_get_actual_len(ext_cur);
seq_blocks = 0;
}
out:
if (orig_path) {
ext4_ext_drop_refs(orig_path);
kfree(orig_path);
}
if (holecheck_path) {
ext4_ext_drop_refs(holecheck_path);
kfree(holecheck_path);
}
out2:
mext_inode_double_unlock(orig_inode, donor_inode);
if (ret)
return ret;
/* All of the specified blocks must be exchanged in succeed */
BUG_ON(*moved_len != len);
return 0;
}
...@@ -1782,7 +1782,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, ...@@ -1782,7 +1782,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
if (IS_DIRSYNC(dir)) if (IS_DIRSYNC(dir))
ext4_handle_sync(handle); ext4_handle_sync(handle);
inode = ext4_new_inode (handle, dir, mode); inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
err = PTR_ERR(inode); err = PTR_ERR(inode);
if (!IS_ERR(inode)) { if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations; inode->i_op = &ext4_file_inode_operations;
...@@ -1816,7 +1816,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, ...@@ -1816,7 +1816,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
if (IS_DIRSYNC(dir)) if (IS_DIRSYNC(dir))
ext4_handle_sync(handle); ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, mode); inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
err = PTR_ERR(inode); err = PTR_ERR(inode);
if (!IS_ERR(inode)) { if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev); init_special_inode(inode, inode->i_mode, rdev);
...@@ -1853,7 +1853,8 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) ...@@ -1853,7 +1853,8 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (IS_DIRSYNC(dir)) if (IS_DIRSYNC(dir))
ext4_handle_sync(handle); ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFDIR | mode); inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
&dentry->d_name, 0);
err = PTR_ERR(inode); err = PTR_ERR(inode);
if (IS_ERR(inode)) if (IS_ERR(inode))
goto out_stop; goto out_stop;
...@@ -2264,7 +2265,8 @@ static int ext4_symlink(struct inode *dir, ...@@ -2264,7 +2265,8 @@ static int ext4_symlink(struct inode *dir,
if (IS_DIRSYNC(dir)) if (IS_DIRSYNC(dir))
ext4_handle_sync(handle); ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
&dentry->d_name, 0);
err = PTR_ERR(inode); err = PTR_ERR(inode);
if (IS_ERR(inode)) if (IS_ERR(inode))
goto out_stop; goto out_stop;
......
...@@ -37,7 +37,6 @@ ...@@ -37,7 +37,6 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/marker.h>
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/crc16.h> #include <linux/crc16.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -47,6 +46,9 @@ ...@@ -47,6 +46,9 @@
#include "xattr.h" #include "xattr.h"
#include "acl.h" #include "acl.h"
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>
static int default_mb_history_length = 1000; static int default_mb_history_length = 1000;
module_param_named(default_mb_history_length, default_mb_history_length, module_param_named(default_mb_history_length, default_mb_history_length,
...@@ -301,7 +303,7 @@ static void ext4_handle_error(struct super_block *sb) ...@@ -301,7 +303,7 @@ static void ext4_handle_error(struct super_block *sb)
if (!test_opt(sb, ERRORS_CONT)) { if (!test_opt(sb, ERRORS_CONT)) {
journal_t *journal = EXT4_SB(sb)->s_journal; journal_t *journal = EXT4_SB(sb)->s_journal;
EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
if (journal) if (journal)
jbd2_journal_abort(journal, -EIO); jbd2_journal_abort(journal, -EIO);
} }
...@@ -414,7 +416,7 @@ void ext4_abort(struct super_block *sb, const char *function, ...@@ -414,7 +416,7 @@ void ext4_abort(struct super_block *sb, const char *function,
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
sb->s_flags |= MS_RDONLY; sb->s_flags |= MS_RDONLY;
EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
if (EXT4_SB(sb)->s_journal) if (EXT4_SB(sb)->s_journal)
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
} }
...@@ -1474,7 +1476,7 @@ static int parse_options(char *options, struct super_block *sb, ...@@ -1474,7 +1476,7 @@ static int parse_options(char *options, struct super_block *sb,
break; break;
#endif #endif
case Opt_abort: case Opt_abort:
set_opt(sbi->s_mount_opt, ABORT); sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
break; break;
case Opt_nobarrier: case Opt_nobarrier:
clear_opt(sbi->s_mount_opt, BARRIER); clear_opt(sbi->s_mount_opt, BARRIER);
...@@ -1653,7 +1655,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, ...@@ -1653,7 +1655,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ext4_commit_super(sb, 1); ext4_commit_super(sb, 1);
if (test_opt(sb, DEBUG)) if (test_opt(sb, DEBUG))
printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
"bpg=%lu, ipg=%lu, mo=%04lx]\n", "bpg=%lu, ipg=%lu, mo=%04x]\n",
sb->s_blocksize, sb->s_blocksize,
sbi->s_groups_count, sbi->s_groups_count,
EXT4_BLOCKS_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
...@@ -2204,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes); ...@@ -2204,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes);
EXT4_RO_ATTR(lifetime_write_kbytes); EXT4_RO_ATTR(lifetime_write_kbytes);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
inode_readahead_blks_store, s_inode_readahead_blks); inode_readahead_blks_store, s_inode_readahead_blks);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
...@@ -2216,6 +2219,7 @@ static struct attribute *ext4_attrs[] = { ...@@ -2216,6 +2219,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(session_write_kbytes), ATTR_LIST(session_write_kbytes),
ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(inode_readahead_blks), ATTR_LIST(inode_readahead_blks),
ATTR_LIST(inode_goal),
ATTR_LIST(mb_stats), ATTR_LIST(mb_stats),
ATTR_LIST(mb_max_to_scan), ATTR_LIST(mb_max_to_scan),
ATTR_LIST(mb_min_to_scan), ATTR_LIST(mb_min_to_scan),
...@@ -3346,7 +3350,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) ...@@ -3346,7 +3350,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
int ret = 0; int ret = 0;
tid_t target; tid_t target;
trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); trace_ext4_sync_fs(sb, wait);
if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
if (wait) if (wait)
jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
...@@ -3450,7 +3454,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -3450,7 +3454,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts; goto restore_opts;
} }
if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ext4_abort(sb, __func__, "Abort forced by user"); ext4_abort(sb, __func__, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
...@@ -3465,7 +3469,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -3465,7 +3469,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
n_blocks_count > ext4_blocks_count(es)) { n_blocks_count > ext4_blocks_count(es)) {
if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS; err = -EROFS;
goto restore_opts; goto restore_opts;
} }
......
...@@ -20,9 +20,9 @@ ...@@ -20,9 +20,9 @@
#include <linux/time.h> #include <linux/time.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/jbd2.h> #include <linux/jbd2.h>
#include <linux/marker.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <trace/events/jbd2.h>
/* /*
* Unlink a buffer from a transaction checkpoint list. * Unlink a buffer from a transaction checkpoint list.
...@@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) ...@@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
* journal straight away. * journal straight away.
*/ */
result = jbd2_cleanup_journal_tail(journal); result = jbd2_cleanup_journal_tail(journal);
trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", trace_jbd2_checkpoint(journal, result);
journal->j_devname, result);
jbd_debug(1, "cleanup_journal_tail returned %d\n", result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
if (result <= 0) if (result <= 0)
return result; return result;
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include <linux/time.h> #include <linux/time.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/jbd2.h> #include <linux/jbd2.h>
#include <linux/marker.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/mm.h> #include <linux/mm.h>
...@@ -26,6 +25,7 @@ ...@@ -26,6 +25,7 @@
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <trace/events/jbd2.h>
/* /*
* Default IO end handler for temporary BJ_IO buffer_heads. * Default IO end handler for temporary BJ_IO buffer_heads.
...@@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal, ...@@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal,
* block allocation with delalloc. We need to write * block allocation with delalloc. We need to write
* only allocated blocks here. * only allocated blocks here.
*/ */
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
err = journal_submit_inode_data_buffers(mapping); err = journal_submit_inode_data_buffers(mapping);
if (!ret) if (!ret)
ret = err; ret = err;
...@@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction = journal->j_running_transaction; commit_transaction = journal->j_running_transaction;
J_ASSERT(commit_transaction->t_state == T_RUNNING); J_ASSERT(commit_transaction->t_state == T_RUNNING);
trace_mark(jbd2_start_commit, "dev %s transaction %d", trace_jbd2_start_commit(journal, commit_transaction);
journal->j_devname, commit_transaction->t_tid);
jbd_debug(1, "JBD: starting commit of transaction %d\n", jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid); commit_transaction->t_tid);
...@@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/ */
if (commit_transaction->t_synchronous_commit) if (commit_transaction->t_synchronous_commit)
write_op = WRITE_SYNC_PLUG; write_op = WRITE_SYNC_PLUG;
trace_jbd2_commit_locking(journal, commit_transaction);
stats.u.run.rs_wait = commit_transaction->t_max_wait; stats.u.run.rs_wait = commit_transaction->t_max_wait;
stats.u.run.rs_locked = jiffies; stats.u.run.rs_locked = jiffies;
stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
...@@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/ */
jbd2_journal_switch_revoke_table(journal); jbd2_journal_switch_revoke_table(journal);
trace_jbd2_commit_flushing(journal, commit_transaction);
stats.u.run.rs_flushing = jiffies; stats.u.run.rs_flushing = jiffies;
stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
stats.u.run.rs_flushing); stats.u.run.rs_flushing);
...@@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction->t_state = T_COMMIT; commit_transaction->t_state = T_COMMIT;
spin_unlock(&journal->j_state_lock); spin_unlock(&journal->j_state_lock);
trace_jbd2_commit_logging(journal, commit_transaction);
stats.u.run.rs_logging = jiffies; stats.u.run.rs_logging = jiffies;
stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
stats.u.run.rs_logging); stats.u.run.rs_logging);
...@@ -1054,9 +1057,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -1054,9 +1057,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (journal->j_commit_callback) if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction); journal->j_commit_callback(journal, commit_transaction);
trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", trace_jbd2_end_commit(journal, commit_transaction);
journal->j_devname, commit_transaction->t_tid,
journal->j_tail_sequence);
jbd_debug(1, "JBD: commit %d complete, head %d\n", jbd_debug(1, "JBD: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence); journal->j_commit_sequence, journal->j_tail_sequence);
if (to_free) if (to_free)
......
...@@ -38,6 +38,10 @@ ...@@ -38,6 +38,10 @@
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/math64.h> #include <linux/math64.h>
#include <linux/hash.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/page.h> #include <asm/page.h>
...@@ -2377,6 +2381,71 @@ static void __exit journal_exit(void) ...@@ -2377,6 +2381,71 @@ static void __exit journal_exit(void)
jbd2_journal_destroy_caches(); jbd2_journal_destroy_caches();
} }
/*
* jbd2_dev_to_name is a utility function used by the jbd2 and ext4
* tracing infrastructure to map a dev_t to a device name.
*
* The caller should use rcu_read_lock() in order to make sure the
* device name stays valid until its done with it. We use
* rcu_read_lock() as well to make sure we're safe in case the caller
* gets sloppy, and because rcu_read_lock() is cheap and can be safely
* nested.
*/
struct devname_cache {
struct rcu_head rcu;
dev_t device;
char devname[BDEVNAME_SIZE];
};
#define CACHE_SIZE_BITS 6
static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
static DEFINE_SPINLOCK(devname_cache_lock);
static void free_devcache(struct rcu_head *rcu)
{
kfree(rcu);
}
const char *jbd2_dev_to_name(dev_t device)
{
int i = hash_32(device, CACHE_SIZE_BITS);
char *ret;
struct block_device *bd;
rcu_read_lock();
if (devcache[i] && devcache[i]->device == device) {
ret = devcache[i]->devname;
rcu_read_unlock();
return ret;
}
rcu_read_unlock();
spin_lock(&devname_cache_lock);
if (devcache[i]) {
if (devcache[i]->device == device) {
ret = devcache[i]->devname;
spin_unlock(&devname_cache_lock);
return ret;
}
call_rcu(&devcache[i]->rcu, free_devcache);
}
devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
if (!devcache[i]) {
spin_unlock(&devname_cache_lock);
return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
}
devcache[i]->device = device;
bd = bdget(device);
if (bd) {
bdevname(bd, devcache[i]->devname);
bdput(bd);
} else
__bdevname(device, devcache[i]->devname);
ret = devcache[i]->devname;
spin_unlock(&devname_cache_lock);
return ret;
}
EXPORT_SYMBOL(jbd2_dev_to_name);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
module_init(journal_init); module_init(journal_init);
module_exit(journal_exit); module_exit(journal_exit);
......
...@@ -1547,36 +1547,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) ...@@ -1547,36 +1547,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
return; return;
} }
/*
* jbd2_journal_try_to_free_buffers() could race with
* jbd2_journal_commit_transaction(). The later might still hold the
* reference count to the buffers when inspecting them on
* t_syncdata_list or t_locked_list.
*
* jbd2_journal_try_to_free_buffers() will call this function to
* wait for the current transaction to finish syncing data buffers, before
* try to free that buffer.
*
* Called with journal->j_state_lock hold.
*/
static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
{
transaction_t *transaction;
tid_t tid;
spin_lock(&journal->j_state_lock);
transaction = journal->j_committing_transaction;
if (!transaction) {
spin_unlock(&journal->j_state_lock);
return;
}
tid = transaction->t_tid;
spin_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid);
}
/** /**
* int jbd2_journal_try_to_free_buffers() - try to free page buffers. * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation * @journal: journal for operation
...@@ -1649,25 +1619,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, ...@@ -1649,25 +1619,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
ret = try_to_free_buffers(page); ret = try_to_free_buffers(page);
/*
* There are a number of places where jbd2_journal_try_to_free_buffers()
* could race with jbd2_journal_commit_transaction(), the later still
* holds the reference to the buffers to free while processing them.
* try_to_free_buffers() failed to free those buffers. Some of the
* caller of releasepage() request page buffers to be dropped, otherwise
* treat the fail-to-free as errors (such as generic_file_direct_IO())
*
* So, if the caller of try_to_release_page() wants the synchronous
* behaviour(i.e make sure buffers are dropped upon return),
* let's wait for the current transaction to finish flush of
* dirty data buffers, then try to free those buffers again,
* with the journal locked.
*/
if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
jbd2_journal_wait_for_transaction_sync_data(journal);
ret = try_to_free_buffers(page);
}
busy: busy:
return ret; return ret;
} }
......
...@@ -1315,6 +1315,12 @@ extern int jbd_blocks_per_page(struct inode *inode); ...@@ -1315,6 +1315,12 @@ extern int jbd_blocks_per_page(struct inode *inode);
#define BUFFER_TRACE2(bh, bh2, info) do {} while (0) #define BUFFER_TRACE2(bh, bh2, info) do {} while (0)
#define JBUFFER_TRACE(jh, info) do {} while (0) #define JBUFFER_TRACE(jh, info) do {} while (0)
/*
* jbd2_dev_to_name is a utility function used by the jbd2 and ext4
* tracing infrastructure to map a dev_t to a device name.
*/
extern const char *jbd2_dev_to_name(dev_t device);
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_JBD2_H */ #endif /* _LINUX_JBD2_H */
#if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_EXT4_H
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ext4
#include <linux/writeback.h>
#include "../../../fs/ext4/ext4.h"
#include "../../../fs/ext4/mballoc.h"
#include <linux/tracepoint.h>
TRACE_EVENT(ext4_free_inode,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( uid_t, uid )
__field( gid_t, gid )
__field( blkcnt_t, blocks )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->uid = inode->i_uid;
__entry->gid = inode->i_gid;
__entry->blocks = inode->i_blocks;
),
TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode,
__entry->uid, __entry->gid, __entry->blocks)
);
TRACE_EVENT(ext4_request_inode,
TP_PROTO(struct inode *dir, int mode),
TP_ARGS(dir, mode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
TP_printk("dev %s dir %lu mode %d",
jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode)
);
TRACE_EVENT(ext4_allocate_inode,
TP_PROTO(struct inode *inode, struct inode *dir, int mode),
TP_ARGS(inode, dir, mode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
TP_printk("dev %s ino %lu dir %lu mode %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode)
);
TRACE_EVENT(ext4_write_begin,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int flags),
TP_ARGS(inode, pos, len, flags),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, flags )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->flags = flags;
),
TP_printk("dev %s ino %lu pos %llu len %u flags %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->flags)
);
TRACE_EVENT(ext4_ordered_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, copied )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->copied)
);
TRACE_EVENT(ext4_writeback_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, copied )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->copied)
);
TRACE_EVENT(ext4_journalled_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, copied )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->copied)
);
TRACE_EVENT(ext4_da_writepage,
TP_PROTO(struct inode *inode, struct page *page),
TP_ARGS(inode, page),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->index = page->index;
),
TP_printk("dev %s ino %lu page_index %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
);
TRACE_EVENT(ext4_da_writepages,
TP_PROTO(struct inode *inode, struct writeback_control *wbc),
TP_ARGS(inode, wbc),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( long, nr_to_write )
__field( long, pages_skipped )
__field( loff_t, range_start )
__field( loff_t, range_end )
__field( char, nonblocking )
__field( char, for_kupdate )
__field( char, for_reclaim )
__field( char, for_writepages )
__field( char, range_cyclic )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->range_start = wbc->range_start;
__entry->range_end = wbc->range_end;
__entry->nonblocking = wbc->nonblocking;
__entry->for_kupdate = wbc->for_kupdate;
__entry->for_reclaim = wbc->for_reclaim;
__entry->for_writepages = wbc->for_writepages;
__entry->range_cyclic = wbc->range_cyclic;
),
TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d for_writepages %d range_cyclic %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write,
__entry->pages_skipped, __entry->range_start,
__entry->range_end, __entry->nonblocking,
__entry->for_kupdate, __entry->for_reclaim,
__entry->for_writepages, __entry->range_cyclic)
);
TRACE_EVENT(ext4_da_writepages_result,
TP_PROTO(struct inode *inode, struct writeback_control *wbc,
int ret, int pages_written),
TP_ARGS(inode, wbc, ret, pages_written),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( int, ret )
__field( int, pages_written )
__field( long, pages_skipped )
__field( char, encountered_congestion )
__field( char, more_io )
__field( char, no_nrwrite_index_update )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->ret = ret;
__entry->pages_written = pages_written;
__entry->pages_skipped = wbc->pages_skipped;
__entry->encountered_congestion = wbc->encountered_congestion;
__entry->more_io = wbc->more_io;
__entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update;
),
TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret,
__entry->pages_written, __entry->pages_skipped,
__entry->encountered_congestion, __entry->more_io,
__entry->no_nrwrite_index_update)
);
TRACE_EVENT(ext4_da_write_begin,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int flags),
TP_ARGS(inode, pos, len, flags),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, flags )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->flags = flags;
),
TP_printk("dev %s ino %lu pos %llu len %u flags %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->flags)
);
TRACE_EVENT(ext4_da_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, copied )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->copied)
);
TRACE_EVENT(ext4_normal_writepage,
TP_PROTO(struct inode *inode, struct page *page),
TP_ARGS(inode, page),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->index = page->index;
),
TP_printk("dev %s ino %lu page_index %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
);
TRACE_EVENT(ext4_journalled_writepage,
TP_PROTO(struct inode *inode, struct page *page),
TP_ARGS(inode, page),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->index = page->index;
),
TP_printk("dev %s ino %lu page_index %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
);
TRACE_EVENT(ext4_discard_blocks,
TP_PROTO(struct super_block *sb, unsigned long long blk,
unsigned long long count),
TP_ARGS(sb, blk, count),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( __u64, blk )
__field( __u64, count )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->blk = blk;
__entry->count = count;
),
TP_printk("dev %s blk %llu count %llu",
jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count)
);
TRACE_EVENT(ext4_mb_new_inode_pa,
TP_PROTO(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa),
TP_ARGS(ac, pa),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
__field( __u64, pa_lstart )
),
TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
__entry->pa_lstart = pa->pa_lstart;
),
TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
__entry->pa_len, __entry->pa_lstart)
);
TRACE_EVENT(ext4_mb_new_group_pa,
TP_PROTO(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa),
TP_ARGS(ac, pa),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
__field( __u64, pa_lstart )
),
TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
__entry->pa_lstart = pa->pa_lstart;
),
TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
__entry->pa_len, __entry->pa_lstart)
);
TRACE_EVENT(ext4_mb_release_inode_pa,
TP_PROTO(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa,
unsigned long long block, unsigned int count),
TP_ARGS(ac, pa, block, count),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, block )
__field( __u32, count )
),
TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->block = block;
__entry->count = count;
),
TP_printk("dev %s ino %lu block %llu count %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
__entry->count)
);
TRACE_EVENT(ext4_mb_release_group_pa,
TP_PROTO(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa),
TP_ARGS(ac, pa),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
),
TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
),
TP_printk("dev %s pstart %llu len %u",
jbd2_dev_to_name(__entry->dev), __entry->pa_pstart, __entry->pa_len)
);
TRACE_EVENT(ext4_discard_preallocations,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
),
TP_printk("dev %s ino %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino)
);
TRACE_EVENT(ext4_mb_discard_preallocations,
TP_PROTO(struct super_block *sb, int needed),
TP_ARGS(sb, needed),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, needed )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->needed = needed;
),
TP_printk("dev %s needed %d",
jbd2_dev_to_name(__entry->dev), __entry->needed)
);
TRACE_EVENT(ext4_request_blocks,
TP_PROTO(struct ext4_allocation_request *ar),
TP_ARGS(ar),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned int, flags )
__field( unsigned int, len )
__field( __u64, logical )
__field( __u64, goal )
__field( __u64, lleft )
__field( __u64, lright )
__field( __u64, pleft )
__field( __u64, pright )
),
TP_fast_assign(
__entry->dev = ar->inode->i_sb->s_dev;
__entry->ino = ar->inode->i_ino;
__entry->flags = ar->flags;
__entry->len = ar->len;
__entry->logical = ar->logical;
__entry->goal = ar->goal;
__entry->lleft = ar->lleft;
__entry->lright = ar->lright;
__entry->pleft = ar->pleft;
__entry->pright = ar->pright;
),
TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
__entry->len,
(unsigned long long) __entry->logical,
(unsigned long long) __entry->goal,
(unsigned long long) __entry->lleft,
(unsigned long long) __entry->lright,
(unsigned long long) __entry->pleft,
(unsigned long long) __entry->pright)
);
TRACE_EVENT(ext4_allocate_blocks,
TP_PROTO(struct ext4_allocation_request *ar, unsigned long long block),
TP_ARGS(ar, block),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, block )
__field( unsigned int, flags )
__field( unsigned int, len )
__field( __u64, logical )
__field( __u64, goal )
__field( __u64, lleft )
__field( __u64, lright )
__field( __u64, pleft )
__field( __u64, pright )
),
TP_fast_assign(
__entry->dev = ar->inode->i_sb->s_dev;
__entry->ino = ar->inode->i_ino;
__entry->block = block;
__entry->flags = ar->flags;
__entry->len = ar->len;
__entry->logical = ar->logical;
__entry->goal = ar->goal;
__entry->lleft = ar->lleft;
__entry->lright = ar->lright;
__entry->pleft = ar->pleft;
__entry->pright = ar->pright;
),
TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
__entry->len, __entry->block,
(unsigned long long) __entry->logical,
(unsigned long long) __entry->goal,
(unsigned long long) __entry->lleft,
(unsigned long long) __entry->lright,
(unsigned long long) __entry->pleft,
(unsigned long long) __entry->pright)
);
TRACE_EVENT(ext4_free_blocks,
TP_PROTO(struct inode *inode, __u64 block, unsigned long count,
int metadata),
TP_ARGS(inode, block, count, metadata),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, block )
__field( unsigned long, count )
__field( int, metadata )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->block = block;
__entry->count = count;
__entry->metadata = metadata;
),
TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
__entry->count, __entry->metadata)
);
TRACE_EVENT(ext4_sync_file,
TP_PROTO(struct file *file, struct dentry *dentry, int datasync),
TP_ARGS(file, dentry, datasync),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ino_t, parent )
__field( int, datasync )
),
TP_fast_assign(
__entry->dev = dentry->d_inode->i_sb->s_dev;
__entry->ino = dentry->d_inode->i_ino;
__entry->datasync = datasync;
__entry->parent = dentry->d_parent->d_inode->i_ino;
),
TP_printk("dev %s ino %ld parent %ld datasync %d ",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent,
__entry->datasync)
);
TRACE_EVENT(ext4_sync_fs,
TP_PROTO(struct super_block *sb, int wait),
TP_ARGS(sb, wait),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, wait )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->wait = wait;
),
TP_printk("dev %s wait %d", jbd2_dev_to_name(__entry->dev),
__entry->wait)
);
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
#if !defined(_TRACE_JBD2_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_JBD2_H
#include <linux/jbd2.h>
#include <linux/tracepoint.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM jbd2
TRACE_EVENT(jbd2_checkpoint,
TP_PROTO(journal_t *journal, int result),
TP_ARGS(journal, result),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, result )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->result = result;
),
TP_printk("dev %s result %d",
jbd2_dev_to_name(__entry->dev), __entry->result)
);
TRACE_EVENT(jbd2_start_commit,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %s transaction %d sync %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit)
);
TRACE_EVENT(jbd2_commit_locking,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %s transaction %d sync %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit)
);
TRACE_EVENT(jbd2_commit_flushing,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %s transaction %d sync %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit)
);
TRACE_EVENT(jbd2_commit_logging,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %s transaction %d sync %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit)
);
TRACE_EVENT(jbd2_end_commit,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
__field( int, head )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
__entry->head = journal->j_tail_sequence;
),
TP_printk("dev %s transaction %d sync %d head %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit, __entry->head)
);
TRACE_EVENT(jbd2_submit_inode_data,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
),
TP_printk("dev %s ino %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino)
);
#endif /* _TRACE_JBD2_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment