Commit 77115225 authored by Amir Goldstein's avatar Amir Goldstein Committed by Jan Kara

fanotify: cache fsid in fsnotify_mark_connector

For FAN_REPORT_FID, we need to encode fid with fsid of the filesystem on
every event. To avoid having to call vfs_statfs() on every event to get
fsid, we store the fsid in fsnotify_mark_connector on the first time we
add a mark and on handle event we use the cached fsid.

Subsequent calls to add mark on the same object are expected to pass the
same fsid, so the call will fail on cached fsid mismatch.

If an event is reported on several mark types (inode, mount, filesystem),
all connectors should already have the same fsid, so we use the cached
fsid from the first connector.

[JK: Simplify code flow around fanotify_get_fid()
     make fsid argument of fsnotify_add_mark_locked() unconditional]
Suggested-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarAmir Goldstein <amir73il@gmail.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
parent a8b13aa2
......@@ -153,26 +153,20 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info,
}
static int fanotify_encode_fid(struct fanotify_event *event,
const struct path *path, gfp_t gfp)
struct inode *inode, gfp_t gfp,
__kernel_fsid_t *fsid)
{
struct fanotify_fid *fid = &event->fid;
int dwords, bytes = 0;
struct kstatfs stat;
int err, type;
stat.f_fsid.val[0] = stat.f_fsid.val[1] = 0;
fid->ext_fh = NULL;
dwords = 0;
err = -ENOENT;
type = exportfs_encode_inode_fh(d_inode(path->dentry), NULL, &dwords,
NULL);
type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
if (!dwords)
goto out_err;
err = vfs_statfs(path, &stat);
if (err)
goto out_err;
bytes = dwords << 2;
if (bytes > FANOTIFY_INLINE_FH_LEN) {
/* Treat failure to allocate fh as failure to allocate event */
......@@ -182,14 +176,13 @@ static int fanotify_encode_fid(struct fanotify_event *event,
goto out_err;
}
type = exportfs_encode_inode_fh(d_inode(path->dentry),
fanotify_fid_fh(fid, bytes), &dwords,
NULL);
type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes),
&dwords, NULL);
err = -EINVAL;
if (!type || type == FILEID_INVALID || bytes != dwords << 2)
goto out_err;
fid->fsid = stat.f_fsid;
fid->fsid = *fsid;
event->fh_len = bytes;
return type;
......@@ -197,8 +190,7 @@ static int fanotify_encode_fid(struct fanotify_event *event,
out_err:
pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, "
"type=%d, bytes=%d, err=%i)\n",
stat.f_fsid.val[0], stat.f_fsid.val[1],
type, bytes, err);
fsid->val[0], fsid->val[1], type, bytes, err);
kfree(fid->ext_fh);
fid->ext_fh = NULL;
event->fh_len = 0;
......@@ -207,8 +199,9 @@ static int fanotify_encode_fid(struct fanotify_event *event,
}
struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
struct inode *inode, u32 mask,
const struct path *path)
struct inode *inode, u32 mask,
const struct path *path,
__kernel_fsid_t *fsid)
{
struct fanotify_event *event = NULL;
gfp_t gfp = GFP_KERNEL_ACCOUNT;
......@@ -247,7 +240,8 @@ init: __maybe_unused
event->fh_len = 0;
if (path && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
/* Report the event without a file identifier on encode error */
event->fh_type = fanotify_encode_fid(event, path, gfp);
event->fh_type = fanotify_encode_fid(event,
d_inode(path->dentry), gfp, fsid);
} else if (path) {
event->fh_type = FILEID_ROOT;
event->path = *path;
......@@ -262,6 +256,29 @@ init: __maybe_unused
return event;
}
/*
* Get cached fsid of the filesystem containing the object from any connector.
* All connectors are supposed to have the same fsid, but we do not verify that
* here.
*/
static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
{
int type;
__kernel_fsid_t fsid = {};
fsnotify_foreach_obj_type(type) {
if (!fsnotify_iter_should_report_type(iter_info, type))
continue;
fsid = iter_info->marks[type]->connector->fsid;
if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
continue;
return fsid;
}
return fsid;
}
static int fanotify_handle_event(struct fsnotify_group *group,
struct inode *inode,
u32 mask, const void *data, int data_type,
......@@ -271,6 +288,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
int ret = 0;
struct fanotify_event *event;
struct fsnotify_event *fsn_event;
__kernel_fsid_t fsid = {};
BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
......@@ -303,7 +321,10 @@ static int fanotify_handle_event(struct fsnotify_group *group,
return 0;
}
event = fanotify_alloc_event(group, inode, mask, data);
if (FAN_GROUP_FLAG(group, FAN_REPORT_FID))
fsid = fanotify_get_fsid(iter_info);
event = fanotify_alloc_event(group, inode, mask, data, &fsid);
ret = -ENOMEM;
if (unlikely(!event)) {
/*
......
......@@ -131,5 +131,6 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
}
struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
struct inode *inode, u32 mask,
const struct path *path);
struct inode *inode, u32 mask,
const struct path *path,
__kernel_fsid_t *fsid);
......@@ -653,7 +653,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp,
unsigned int type)
unsigned int type,
__kernel_fsid_t *fsid)
{
struct fsnotify_mark *mark;
int ret;
......@@ -666,7 +667,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
return ERR_PTR(-ENOMEM);
fsnotify_init_mark(mark, group);
ret = fsnotify_add_mark_locked(mark, connp, type, 0);
ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid);
if (ret) {
fsnotify_put_mark(mark);
return ERR_PTR(ret);
......@@ -678,7 +679,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
static int fanotify_add_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, unsigned int type,
__u32 mask, unsigned int flags)
__u32 mask, unsigned int flags,
__kernel_fsid_t *fsid)
{
struct fsnotify_mark *fsn_mark;
__u32 added;
......@@ -686,7 +688,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
mutex_lock(&group->mark_mutex);
fsn_mark = fsnotify_find_mark(connp, group);
if (!fsn_mark) {
fsn_mark = fanotify_add_new_mark(group, connp, type);
fsn_mark = fanotify_add_new_mark(group, connp, type, fsid);
if (IS_ERR(fsn_mark)) {
mutex_unlock(&group->mark_mutex);
return PTR_ERR(fsn_mark);
......@@ -703,23 +705,23 @@ static int fanotify_add_mark(struct fsnotify_group *group,
static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt, __u32 mask,
unsigned int flags)
unsigned int flags, __kernel_fsid_t *fsid)
{
return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags);
FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
}
static int fanotify_add_sb_mark(struct fsnotify_group *group,
struct super_block *sb, __u32 mask,
unsigned int flags)
struct super_block *sb, __u32 mask,
unsigned int flags, __kernel_fsid_t *fsid)
{
return fanotify_add_mark(group, &sb->s_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_SB, mask, flags);
FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
}
static int fanotify_add_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
unsigned int flags)
unsigned int flags, __kernel_fsid_t *fsid)
{
pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
......@@ -734,7 +736,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
return 0;
return fanotify_add_mark(group, &inode->i_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_INODE, mask, flags);
FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid);
}
/* fanotify syscalls */
......@@ -798,7 +800,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
atomic_inc(&user->fanotify_listeners);
group->memcg = get_mem_cgroup_from_mm(current->mm);
oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, NULL);
if (unlikely(!oevent)) {
fd = -ENOMEM;
goto out_destroy_group;
......@@ -861,9 +863,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
}
/* Check if filesystem can encode a unique fid */
static int fanotify_test_fid(struct path *path)
static int fanotify_test_fid(struct path *path, struct kstatfs *stat)
{
struct kstatfs stat, root_stat;
struct kstatfs root_stat;
struct path root = {
.mnt = path->mnt,
.dentry = path->dentry->d_sb->s_root,
......@@ -873,11 +875,11 @@ static int fanotify_test_fid(struct path *path)
/*
* Make sure path is not in filesystem with zero fsid (e.g. tmpfs).
*/
err = vfs_statfs(path, &stat);
err = vfs_statfs(path, stat);
if (err)
return err;
if (!stat.f_fsid.val[0] && !stat.f_fsid.val[1])
if (!stat->f_fsid.val[0] && !stat->f_fsid.val[1])
return -ENODEV;
/*
......@@ -888,8 +890,8 @@ static int fanotify_test_fid(struct path *path)
if (err)
return err;
if (root_stat.f_fsid.val[0] != stat.f_fsid.val[0] ||
root_stat.f_fsid.val[1] != stat.f_fsid.val[1])
if (root_stat.f_fsid.val[0] != stat->f_fsid.val[0] ||
root_stat.f_fsid.val[1] != stat->f_fsid.val[1])
return -EXDEV;
/*
......@@ -914,6 +916,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
struct fsnotify_group *group;
struct fd f;
struct path path;
struct kstatfs stat;
__kernel_fsid_t *fsid = NULL;
u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
int ret;
......@@ -992,9 +996,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
goto fput_and_out;
if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
ret = fanotify_test_fid(&path);
ret = fanotify_test_fid(&path, &stat);
if (ret)
goto path_put_and_out;
fsid = &stat.f_fsid;
}
/* inode held in place by reference to path; group by fget on fd */
......@@ -1007,19 +1013,25 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
case FAN_MARK_ADD:
if (mark_type == FAN_MARK_MOUNT)
ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
ret = fanotify_add_vfsmount_mark(group, mnt, mask,
flags, fsid);
else if (mark_type == FAN_MARK_FILESYSTEM)
ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, flags);
ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask,
flags, fsid);
else
ret = fanotify_add_inode_mark(group, inode, mask, flags);
ret = fanotify_add_inode_mark(group, inode, mask,
flags, fsid);
break;
case FAN_MARK_REMOVE:
if (mark_type == FAN_MARK_MOUNT)
ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
ret = fanotify_remove_vfsmount_mark(group, mnt, mask,
flags);
else if (mark_type == FAN_MARK_FILESYSTEM)
ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, flags);
ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask,
flags);
else
ret = fanotify_remove_inode_mark(group, inode, mask, flags);
ret = fanotify_remove_inode_mark(group, inode, mask,
flags);
break;
default:
ret = -EINVAL;
......
......@@ -82,6 +82,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
#include <linux/ratelimit.h>
#include <linux/atomic.h>
......@@ -481,7 +482,8 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
}
static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
unsigned int type)
unsigned int type,
__kernel_fsid_t *fsid)
{
struct inode *inode = NULL;
struct fsnotify_mark_connector *conn;
......@@ -493,6 +495,11 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
INIT_HLIST_HEAD(&conn->list);
conn->type = type;
conn->obj = connp;
/* Cache fsid of filesystem containing the object */
if (fsid)
conn->fsid = *fsid;
else
conn->fsid.val[0] = conn->fsid.val[1] = 0;
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
inode = igrab(fsnotify_conn_inode(conn));
/*
......@@ -544,7 +551,7 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector(
*/
static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int type,
int allow_dups)
int allow_dups, __kernel_fsid_t *fsid)
{
struct fsnotify_mark *lmark, *last = NULL;
struct fsnotify_mark_connector *conn;
......@@ -553,15 +560,36 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
if (WARN_ON(!fsnotify_valid_obj_type(type)))
return -EINVAL;
/* Backend is expected to check for zero fsid (e.g. tmpfs) */
if (fsid && WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1]))
return -ENODEV;
restart:
spin_lock(&mark->lock);
conn = fsnotify_grab_connector(connp);
if (!conn) {
spin_unlock(&mark->lock);
err = fsnotify_attach_connector_to_object(connp, type);
err = fsnotify_attach_connector_to_object(connp, type, fsid);
if (err)
return err;
goto restart;
} else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) &&
(fsid->val[0] != conn->fsid.val[0] ||
fsid->val[1] != conn->fsid.val[1])) {
/*
* Backend is expected to check for non uniform fsid
* (e.g. btrfs), but maybe we missed something?
* Only allow setting conn->fsid once to non zero fsid.
* inotify and non-fid fanotify groups do not set nor test
* conn->fsid.
*/
pr_warn_ratelimited("%s: fsid mismatch on object of type %u: "
"%x.%x != %x.%x\n", __func__, conn->type,
fsid->val[0], fsid->val[1],
conn->fsid.val[0], conn->fsid.val[1]);
err = -EXDEV;
goto out_err;
}
/* is mark the first mark? */
......@@ -606,7 +634,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
*/
int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int type,
int allow_dups)
int allow_dups, __kernel_fsid_t *fsid)
{
struct fsnotify_group *group = mark->group;
int ret = 0;
......@@ -627,7 +655,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_get_mark(mark); /* for g_list */
spin_unlock(&mark->lock);
ret = fsnotify_add_mark_list(mark, connp, type, allow_dups);
ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid);
if (ret)
goto err;
......@@ -648,13 +676,13 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
}
int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
unsigned int type, int allow_dups)
unsigned int type, int allow_dups, __kernel_fsid_t *fsid)
{
int ret;
struct fsnotify_group *group = mark->group;
mutex_lock(&group->mark_mutex);
ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups);
ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid);
mutex_unlock(&group->mark_mutex);
return ret;
}
......
......@@ -293,6 +293,7 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
struct fsnotify_mark_connector {
spinlock_t lock;
unsigned int type; /* Type of object [lock] */
__kernel_fsid_t fsid; /* fsid of filesystem containing object */
union {
/* Object pointer [lock] */
fsnotify_connp_t *obj;
......@@ -433,28 +434,35 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark,
/* Find mark belonging to given group in the list of marks */
extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
struct fsnotify_group *group);
/* Get cached fsid of filesystem containing object */
extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn,
__kernel_fsid_t *fsid);
/* attach the mark to the object */
extern int fsnotify_add_mark(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int type,
int allow_dups);
int allow_dups, __kernel_fsid_t *fsid);
extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int type,
int allow_dups);
fsnotify_connp_t *connp,
unsigned int type, int allow_dups,
__kernel_fsid_t *fsid);
/* attach the mark to the inode */
static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct inode *inode,
int allow_dups)
{
return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_INODE, allow_dups);
FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL);
}
static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
struct inode *inode,
int allow_dups)
{
return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
FSNOTIFY_OBJ_TYPE_INODE, allow_dups);
FSNOTIFY_OBJ_TYPE_INODE, allow_dups,
NULL);
}
/* given a group and a mark, flag mark to be freed when all references are dropped */
extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment