Commit 3644286f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'fsnotify_for_v5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:

 - support for limited fanotify functionality for unpriviledged users

 - faster merging of fanotify events

 - a few smaller fsnotify improvements

* tag 'fsnotify_for_v5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  shmem: allow reporting fanotify events with file handles on tmpfs
  fs: introduce a wrapper uuid_to_fsid()
  fanotify_user: use upper_32_bits() to verify mask
  fanotify: support limited functionality for unprivileged users
  fanotify: configurable limits via sysfs
  fanotify: limit number of event merge attempts
  fsnotify: use hash table for faster events merge
  fanotify: mix event info and pid into merge key hash
  fanotify: reduce event objectid to 29-bit hash
  fsnotify: allow fsnotify_{peek,remove}_first_event with empty queue
parents 767fcbc8 59cda49e
......@@ -1399,7 +1399,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
struct super_block *sb = dentry->d_sb;
struct ext2_sb_info *sbi = EXT2_SB(sb);
struct ext2_super_block *es = sbi->s_es;
u64 fsid;
spin_lock(&sbi->s_lock);
......@@ -1453,9 +1452,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
buf->f_ffree = ext2_count_free_inodes(sb);
es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
buf->f_namelen = EXT2_NAME_LEN;
fsid = le64_to_cpup((void *)es->s_uuid) ^
le64_to_cpup((void *)es->s_uuid + sizeof(u64));
buf->f_fsid = u64_to_fsid(fsid);
buf->f_fsid = uuid_to_fsid(es->s_uuid);
spin_unlock(&sbi->s_lock);
return 0;
}
......
......@@ -6153,7 +6153,6 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
ext4_fsblk_t overhead = 0, resv_blocks;
u64 fsid;
s64 bfree;
resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
......@@ -6174,9 +6173,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le32_to_cpu(es->s_inodes_count);
buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
buf->f_namelen = EXT4_NAME_LEN;
fsid = le64_to_cpup((void *)es->s_uuid) ^
le64_to_cpup((void *)es->s_uuid + sizeof(u64));
buf->f_fsid = u64_to_fsid(fsid);
buf->f_fsid = uuid_to_fsid(es->s_uuid);
#ifdef CONFIG_QUOTA
if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
......
This diff is collapsed.
......@@ -3,6 +3,7 @@
#include <linux/path.h>
#include <linux/slab.h>
#include <linux/exportfs.h>
#include <linux/hashtable.h>
extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_fid_event_cachep;
......@@ -115,6 +116,11 @@ static inline void fanotify_info_init(struct fanotify_info *info)
info->name_len = 0;
}
static inline unsigned int fanotify_info_len(struct fanotify_info *info)
{
return info->dir_fh_totlen + info->file_fh_totlen + info->name_len;
}
static inline void fanotify_info_copy_name(struct fanotify_info *info,
const struct qstr *name)
{
......@@ -135,19 +141,31 @@ enum fanotify_event_type {
FANOTIFY_EVENT_TYPE_PATH,
FANOTIFY_EVENT_TYPE_PATH_PERM,
FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
__FANOTIFY_EVENT_TYPE_NUM
};
#define FANOTIFY_EVENT_TYPE_BITS \
(ilog2(__FANOTIFY_EVENT_TYPE_NUM - 1) + 1)
#define FANOTIFY_EVENT_HASH_BITS \
(32 - FANOTIFY_EVENT_TYPE_BITS)
struct fanotify_event {
struct fsnotify_event fse;
struct hlist_node merge_list; /* List for hashed merge */
u32 mask;
enum fanotify_event_type type;
struct {
unsigned int type : FANOTIFY_EVENT_TYPE_BITS;
unsigned int hash : FANOTIFY_EVENT_HASH_BITS;
};
struct pid *pid;
};
static inline void fanotify_init_event(struct fanotify_event *event,
unsigned long id, u32 mask)
unsigned int hash, u32 mask)
{
fsnotify_init_event(&event->fse, id);
fsnotify_init_event(&event->fse);
INIT_HLIST_NODE(&event->merge_list);
event->hash = hash;
event->mask = mask;
event->pid = NULL;
}
......@@ -284,3 +302,25 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event)
else
return NULL;
}
/*
* Use 128 size hash table to speed up events merge.
*/
#define FANOTIFY_HTABLE_BITS (7)
#define FANOTIFY_HTABLE_SIZE (1 << FANOTIFY_HTABLE_BITS)
#define FANOTIFY_HTABLE_MASK (FANOTIFY_HTABLE_SIZE - 1)
/*
* Permission events and overflow event do not get merged - don't hash them.
*/
static inline bool fanotify_is_hashed_event(u32 mask)
{
return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW);
}
static inline unsigned int fanotify_event_hash_bucket(
struct fsnotify_group *group,
struct fanotify_event *event)
{
return event->hash & FANOTIFY_HTABLE_MASK;
}
This diff is collapsed.
......@@ -144,7 +144,8 @@ void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
struct fsnotify_group *group = f->private_data;
seq_printf(m, "fanotify flags:%x event-flags:%x\n",
group->fanotify_data.flags, group->fanotify_data.f_flags);
group->fanotify_data.flags,
group->fanotify_data.f_flags);
show_fdinfo(m, f, fanotify_fdinfo);
}
......
......@@ -122,7 +122,6 @@ static struct fsnotify_group *__fsnotify_alloc_group(
/* set to 0 when there a no external references to this group */
refcount_set(&group->refcnt, 1);
atomic_set(&group->num_marks, 0);
atomic_set(&group->user_waits, 0);
spin_lock_init(&group->notification_lock);
......
......@@ -46,9 +46,10 @@ static bool event_compare(struct fsnotify_event *old_fsn,
return false;
}
static int inotify_merge(struct list_head *list,
struct fsnotify_event *event)
static int inotify_merge(struct fsnotify_group *group,
struct fsnotify_event *event)
{
struct list_head *list = &group->notification_list;
struct fsnotify_event *last_event;
last_event = list_entry(list->prev, struct fsnotify_event, list);
......@@ -107,7 +108,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
mask &= ~IN_ISDIR;
fsn_event = &event->fse;
fsnotify_init_event(fsn_event, 0);
fsnotify_init_event(fsn_event);
event->mask = mask;
event->wd = i_mark->wd;
event->sync_cookie = cookie;
......@@ -115,7 +116,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
if (len)
strcpy(event->name, name->name);
ret = fsnotify_add_event(group, fsn_event, inotify_merge);
ret = fsnotify_add_event(group, fsn_event, inotify_merge, NULL);
if (ret) {
/* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event);
......
......@@ -146,10 +146,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
size_t event_size = sizeof(struct inotify_event);
struct fsnotify_event *event;
if (fsnotify_notify_queue_is_empty(group))
return NULL;
event = fsnotify_peek_first_event(group);
if (!event)
return NULL;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
......@@ -642,7 +641,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
return ERR_PTR(-ENOMEM);
}
group->overflow_event = &oevent->fse;
fsnotify_init_event(group->overflow_event, 0);
fsnotify_init_event(group->overflow_event);
oevent->mask = FS_Q_OVERFLOW;
oevent->wd = -1;
oevent->sync_cookie = 0;
......
......@@ -391,8 +391,6 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
list_del_init(&mark->g_list);
spin_unlock(&mark->lock);
atomic_dec(&group->num_marks);
/* Drop mark reference acquired in fsnotify_add_mark_locked() */
fsnotify_put_mark(mark);
}
......@@ -656,7 +654,6 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;
list_add(&mark->g_list, &group->marks_list);
atomic_inc(&group->num_marks);
fsnotify_get_mark(mark); /* for g_list */
spin_unlock(&mark->lock);
......@@ -674,7 +671,6 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
FSNOTIFY_MARK_FLAG_ATTACHED);
list_del_init(&mark->g_list);
spin_unlock(&mark->lock);
atomic_dec(&group->num_marks);
fsnotify_put_mark(mark);
return ret;
......
......@@ -47,13 +47,6 @@ u32 fsnotify_get_cookie(void)
}
EXPORT_SYMBOL_GPL(fsnotify_get_cookie);
/* return true if the notify queue is empty, false otherwise */
bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
{
assert_spin_locked(&group->notification_lock);
return list_empty(&group->notification_list) ? true : false;
}
void fsnotify_destroy_event(struct fsnotify_group *group,
struct fsnotify_event *event)
{
......@@ -75,16 +68,22 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
}
/*
* Add an event to the group notification queue. The group can later pull this
* event off the queue to deal with. The function returns 0 if the event was
* added to the queue, 1 if the event was merged with some other queued event,
* Try to add an event to the notification queue.
* The group can later pull this event off the queue to deal with.
* The group can use the @merge hook to merge the event with a queued event.
* The group can use the @insert hook to insert the event into hash table.
* The function returns:
* 0 if the event was added to a queue
* 1 if the event was merged with some other queued event
* 2 if the event was not queued - either the queue of events has overflown
* or the group is shutting down.
* or the group is shutting down.
*/
int fsnotify_add_event(struct fsnotify_group *group,
struct fsnotify_event *event,
int (*merge)(struct list_head *,
struct fsnotify_event *))
int (*merge)(struct fsnotify_group *,
struct fsnotify_event *),
void (*insert)(struct fsnotify_group *,
struct fsnotify_event *))
{
int ret = 0;
struct list_head *list = &group->notification_list;
......@@ -111,7 +110,7 @@ int fsnotify_add_event(struct fsnotify_group *group,
}
if (!list_empty(list) && merge) {
ret = merge(list, event);
ret = merge(group, event);
if (ret) {
spin_unlock(&group->notification_lock);
return ret;
......@@ -121,6 +120,8 @@ int fsnotify_add_event(struct fsnotify_group *group,
queue:
group->q_len++;
list_add_tail(&event->list, list);
if (insert)
insert(group, event);
spin_unlock(&group->notification_lock);
wake_up(&group->notification_waitq);
......@@ -141,33 +142,36 @@ void fsnotify_remove_queued_event(struct fsnotify_group *group,
}
/*
* Remove and return the first event from the notification list. It is the
* responsibility of the caller to destroy the obtained event
* Return the first event on the notification list without removing it.
* Returns NULL if the list is empty.
*/
struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
{
struct fsnotify_event *event;
assert_spin_locked(&group->notification_lock);
pr_debug("%s: group=%p\n", __func__, group);
if (fsnotify_notify_queue_is_empty(group))
return NULL;
event = list_first_entry(&group->notification_list,
struct fsnotify_event, list);
fsnotify_remove_queued_event(group, event);
return event;
return list_first_entry(&group->notification_list,
struct fsnotify_event, list);
}
/*
* This will not remove the event, that must be done with
* fsnotify_remove_first_event()
* Remove and return the first event from the notification list. It is the
* responsibility of the caller to destroy the obtained event
*/
struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
{
assert_spin_locked(&group->notification_lock);
struct fsnotify_event *event = fsnotify_peek_first_event(group);
return list_first_entry(&group->notification_list,
struct fsnotify_event, list);
if (!event)
return NULL;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
fsnotify_remove_queued_event(group, event);
return event;
}
/*
......
......@@ -1177,7 +1177,6 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct super_block *sb = dentry->d_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
enum zonefs_ztype t;
u64 fsid;
buf->f_type = ZONEFS_MAGIC;
buf->f_bsize = sb->s_blocksize;
......@@ -1200,9 +1199,7 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
spin_unlock(&sbi->s_lock);
fsid = le64_to_cpup((void *)sbi->s_uuid.b) ^
le64_to_cpup((void *)sbi->s_uuid.b + sizeof(u64));
buf->f_fsid = u64_to_fsid(fsid);
buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
return 0;
}
......
......@@ -2,8 +2,11 @@
#ifndef _LINUX_FANOTIFY_H
#define _LINUX_FANOTIFY_H
#include <linux/sysctl.h>
#include <uapi/linux/fanotify.h>
extern struct ctl_table fanotify_table[]; /* for sysctl */
#define FAN_GROUP_FLAG(group, flag) \
((group)->fanotify_data.flags & (flag))
......@@ -15,15 +18,38 @@
* these constant, the programs may break if re-compiled with new uapi headers
* and then run on an old kernel.
*/
#define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
/* Group classes where permission events are allowed */
#define FANOTIFY_PERM_CLASSES (FAN_CLASS_CONTENT | \
FAN_CLASS_PRE_CONTENT)
#define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES)
#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME)
#define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | FANOTIFY_FID_BITS | \
FAN_REPORT_TID | \
FAN_CLOEXEC | FAN_NONBLOCK | \
FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS)
/*
* fanotify_init() flags that require CAP_SYS_ADMIN.
* We do not allow unprivileged groups to request permission events.
* We do not allow unprivileged groups to get other process pid in events.
* We do not allow unprivileged groups to use unlimited resources.
*/
#define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \
FAN_REPORT_TID | \
FAN_UNLIMITED_QUEUE | \
FAN_UNLIMITED_MARKS)
/*
* fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
* FAN_CLASS_NOTIF is the only class we allow for unprivileged group.
* We do not allow unprivileged groups to get file descriptors in events,
* so one of the flags for reporting file handles is required.
*/
#define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \
FANOTIFY_FID_BITS | \
FAN_CLOEXEC | FAN_NONBLOCK)
#define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \
FANOTIFY_USER_INIT_FLAGS)
#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
FAN_MARK_FILESYSTEM)
......
......@@ -167,7 +167,6 @@ struct fsnotify_ops {
*/
struct fsnotify_event {
struct list_head list;
unsigned long objectid; /* identifier for queue merges */
};
/*
......@@ -207,9 +206,6 @@ struct fsnotify_group {
/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
struct mutex mark_mutex; /* protect marks_list */
atomic_t num_marks; /* 1 for each mark and 1 for not being
* past the point of no return when freeing
* a group */
atomic_t user_waits; /* Number of tasks waiting for user
* response */
struct list_head marks_list; /* all inode marks for this group */
......@@ -234,13 +230,14 @@ struct fsnotify_group {
#endif
#ifdef CONFIG_FANOTIFY
struct fanotify_group_private_data {
/* Hash table of events for merge */
struct hlist_head *merge_hash;
/* allows a group to block waiting for a userspace response */
struct list_head access_list;
wait_queue_head_t access_waitq;
int flags; /* flags from fanotify_init() */
int f_flags; /* event_f_flags from fanotify_init() */
unsigned int max_marks;
struct user_struct *user;
struct ucounts *ucounts;
} fanotify_data;
#endif /* CONFIG_FANOTIFY */
};
......@@ -487,15 +484,23 @@ extern void fsnotify_destroy_event(struct fsnotify_group *group,
/* attach the event to the group notification queue */
extern int fsnotify_add_event(struct fsnotify_group *group,
struct fsnotify_event *event,
int (*merge)(struct list_head *,
struct fsnotify_event *));
int (*merge)(struct fsnotify_group *,
struct fsnotify_event *),
void (*insert)(struct fsnotify_group *,
struct fsnotify_event *));
/* Queue overflow event to a notification group */
static inline void fsnotify_queue_overflow(struct fsnotify_group *group)
{
fsnotify_add_event(group, group->overflow_event, NULL);
fsnotify_add_event(group, group->overflow_event, NULL, NULL);
}
static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
{
assert_spin_locked(&group->notification_lock);
return list_empty(&group->notification_list);
}
/* true if the group notification queue is empty */
extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
/* return, but do not dequeue the first event on the notification queue */
extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
......@@ -576,11 +581,9 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark);
extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info);
extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info);
static inline void fsnotify_init_event(struct fsnotify_event *event,
unsigned long objectid)
static inline void fsnotify_init_event(struct fsnotify_event *event)
{
INIT_LIST_HEAD(&event->list);
event->objectid = objectid;
}
#else
......
......@@ -14,9 +14,6 @@ struct user_struct {
refcount_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
#ifdef CONFIG_FANOTIFY
atomic_t fanotify_listeners;
#endif
#ifdef CONFIG_EPOLL
atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
#endif
......
......@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <asm/statfs.h>
#include <asm/byteorder.h>
struct kstatfs {
long f_type;
......@@ -50,4 +51,11 @@ static inline __kernel_fsid_t u64_to_fsid(u64 v)
return (__kernel_fsid_t){.val = {(u32)v, (u32)(v>>32)}};
}
/* Fold 16 bytes uuid to 64 bit fsid */
static inline __kernel_fsid_t uuid_to_fsid(__u8 *uuid)
{
return u64_to_fsid(le64_to_cpup((void *)uuid) ^
le64_to_cpup((void *)(uuid + sizeof(u64))));
}
#endif
......@@ -49,6 +49,10 @@ enum ucount_type {
#ifdef CONFIG_INOTIFY_USER
UCOUNT_INOTIFY_INSTANCES,
UCOUNT_INOTIFY_WATCHES,
#endif
#ifdef CONFIG_FANOTIFY
UCOUNT_FANOTIFY_GROUPS,
UCOUNT_FANOTIFY_MARKS,
#endif
UCOUNT_COUNTS,
};
......
......@@ -148,6 +148,9 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
#ifdef CONFIG_INOTIFY_USER
#include <linux/inotify.h>
#endif
#ifdef CONFIG_FANOTIFY
#include <linux/fanotify.h>
#endif
#ifdef CONFIG_PROC_SYSCTL
......@@ -3164,7 +3167,14 @@ static struct ctl_table fs_table[] = {
.mode = 0555,
.child = inotify_table,
},
#endif
#endif
#ifdef CONFIG_FANOTIFY
{
.procname = "fanotify",
.mode = 0555,
.child = fanotify_table,
},
#endif
#ifdef CONFIG_EPOLL
{
.procname = "epoll",
......
......@@ -73,6 +73,10 @@ static struct ctl_table user_table[] = {
#ifdef CONFIG_INOTIFY_USER
UCOUNT_ENTRY("max_inotify_instances"),
UCOUNT_ENTRY("max_inotify_watches"),
#endif
#ifdef CONFIG_FANOTIFY
UCOUNT_ENTRY("max_fanotify_groups"),
UCOUNT_ENTRY("max_fanotify_marks"),
#endif
{ }
};
......
......@@ -2846,6 +2846,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_ffree = sbinfo->free_inodes;
}
/* else leave those fields 0 like simple_statfs */
buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment