Commit 64bf6ae9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'v6.5/vfs.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull misc vfs updates from Christian Brauner:
 "Miscellaneous features, cleanups, and fixes for vfs and individual fs

  Features:

   - Use mode 0600 for file created by cachefilesd so it can be run by
     unprivileged users. This aligns them with directories which are
     already created with mode 0700 by cachefilesd

   - Reorder a few members in struct file to prevent some false sharing
     scenarios

   - Indicate that an eventfd is used a semaphore in the eventfd's
     fdinfo procfs file

   - Add a missing uapi header for eventfd exposing relevant uapi
     defines

   - Let the VFS protect transitions of a superblock from read-only to
     read-write in addition to the protection it already provides for
     transitions from read-write to read-only. Protecting read-only to
     read-write transitions allows filesystems such as ext4 to perform
     internal writes, keeping writers away until the transition is
     completed

  Cleanups:

   - Arnd removed the architecture specific arch_report_meminfo()
     prototypes and added a generic one into procfs.h. Note, we got a
     report about a warning in amdpgpu codepaths that suggested this was
     bisectable to this change but we concluded it was a false positive

   - Remove unused parameters from split_fs_names()

   - Rename put_and_unmap_page() to unmap_and_put_page() to let the name
     reflect the order of the cleanup operation that has to unmap before
     the actual put

   - Unexport buffer_check_dirty_writeback() as it is not used outside
     of block device aops

   - Stop allocating aio rings from highmem

   - Protecting read-{only,write} transitions in the VFS used open-coded
     barriers in various places. Replace them with proper little helpers
     and document both the helpers and all barrier interactions involved
     when transitioning between read-{only,write} states

   - Use flexible array members in old readdir codepaths

  Fixes:

   - Use the correct type __poll_t for epoll and eventfd

   - Replace all deprecated strlcpy() invocations, whose return value
     isn't checked with an equivalent strscpy() call

   - Fix some kernel-doc warnings in fs/open.c

   - Reduce the stack usage in jffs2's xattr codepaths finally getting
     rid of this: fs/jffs2/xattr.c:887:1: error: the frame size of 1088
     bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
     royally annoying compilation warning

   - Use __FMODE_NONOTIFY instead of FMODE_NONOTIFY where an int and not
     fmode_t is required to avoid fmode_t to integer degradation
     warnings

   - Create coredumps with O_WRONLY instead of O_RDWR. There's a long
     explanation in that commit how O_RDWR is actually a bug which we
     found out with the help of Linus and git archeology

   - Fix "no previous prototype" warnings in the pipe codepaths

   - Add overflow calculations for remap_verify_area() as a signed
     addition overflow could be triggered in xfstests

   - Fix a null pointer dereference in sysv

   - Use an unsigned variable for length calculations in jfs avoiding
     compilation warnings with gcc 13

   - Fix a dangling pipe pointer in the watch queue codepath

   - The legacy mount option parser provided as a fallback by the VFS
     for filesystems not yet converted to the new mount api did prefix
     the generated mount option string with a leading ',' causing issues
     for some filesystems

   - Fix a repeated word in a comment in fs.h

   - autofs: Update the ctime when mtime is updated as mandated by
     POSIX"

* tag 'v6.5/vfs.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (27 commits)
  readdir: Replace one-element arrays with flexible-array members
  fs: Provide helpers for manipulating sb->s_readonly_remount
  fs: Protect reconfiguration of sb read-write from racing writes
  eventfd: add a uapi header for eventfd userspace APIs
  autofs: set ctime as well when mtime changes on a dir
  eventfd: show the EFD_SEMAPHORE flag in fdinfo
  fs/aio: Stop allocating aio rings from HIGHMEM
  fs: Fix comment typo
  fs: unexport buffer_check_dirty_writeback
  fs: avoid empty option when generating legacy mount string
  watch_queue: prevent dangling pipe pointer
  fs.h: Optimize file struct to prevent false sharing
  highmem: Rename put_and_unmap_page() to unmap_and_put_page()
  cachefiles: Allow the cache to be non-root
  init: remove unused names parameter in split_fs_names()
  jfs: Use unsigned variable for length calculations
  fs/sysv: Null check to prevent null-ptr-deref bug
  fs: use UB-safe check for signed addition overflow in remap_verify_area
  procfs: consolidate arch_report_meminfo declaration
  fs: pipe: reveal missing function protoypes
  ...
parents 5c1c88cd 2507135e
......@@ -472,9 +472,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
#define pte_same(A,B) (pte_val(A) == pte_val(B))
struct seq_file;
extern void arch_report_meminfo(struct seq_file *m);
#endif /* !__ASSEMBLY__ */
......
......@@ -165,9 +165,6 @@ static inline bool is_ioremap_addr(const void *x)
return addr >= IOREMAP_BASE && addr < IOREMAP_END;
}
struct seq_file;
void arch_report_meminfo(struct seq_file *m);
#endif /* CONFIG_PPC64 */
#endif /* __ASSEMBLY__ */
......
......@@ -42,9 +42,6 @@ static inline void update_page_count(int level, long count)
atomic_long_add(count, &direct_pages_count[level]);
}
struct seq_file;
void arch_report_meminfo(struct seq_file *m);
/*
* The S390 doesn't have any external MMU info: the kernel page
* tables contain all the necessary information.
......
......@@ -4,6 +4,7 @@
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/
#include <linux/hugetlb.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <asm/cacheflush.h>
......
......@@ -27,6 +27,7 @@
extern pgd_t early_top_pgt[PTRS_PER_PGD];
bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
struct seq_file;
void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
bool user);
......
......@@ -513,9 +513,6 @@ extern void native_pagetable_init(void);
#define native_pagetable_init paging_init
#endif
struct seq_file;
extern void arch_report_meminfo(struct seq_file *m);
enum pg_level {
PG_LEVEL_NONE,
PG_LEVEL_4K,
......
......@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h>
#include <linux/pfn.h>
#include <linux/percpu.h>
......
......@@ -530,7 +530,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
for (i = 0; i < nr_pages; i++) {
struct page *page;
page = find_or_create_page(file->f_mapping,
i, GFP_HIGHUSER | __GFP_ZERO);
i, GFP_USER | __GFP_ZERO);
if (!page)
break;
pr_debug("pid(%d) page[%d]->count=%d\n",
......@@ -571,7 +571,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */
ring = kmap_atomic(ctx->ring_pages[0]);
ring = page_address(ctx->ring_pages[0]);
ring->nr = nr_events; /* user copy */
ring->id = ~0U;
ring->head = ring->tail = 0;
......@@ -579,7 +579,6 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ring->compat_features = AIO_RING_COMPAT_FEATURES;
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring);
kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]);
return 0;
......@@ -682,9 +681,8 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
* we are protected from page migration
* changes ring_pages by ->ring_lock.
*/
ring = kmap_atomic(ctx->ring_pages[0]);
ring = page_address(ctx->ring_pages[0]);
ring->id = ctx->id;
kunmap_atomic(ring);
return 0;
}
......@@ -1025,9 +1023,8 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* against ctx->completed_events below will make sure we do the
* safe/right thing.
*/
ring = kmap_atomic(ctx->ring_pages[0]);
ring = page_address(ctx->ring_pages[0]);
head = ring->head;
kunmap_atomic(ring);
refill_reqs_available(ctx, head, ctx->tail);
}
......@@ -1133,12 +1130,11 @@ static void aio_complete(struct aio_kiocb *iocb)
if (++tail >= ctx->nr_events)
tail = 0;
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
*event = iocb->ki_res;
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
......@@ -1152,10 +1148,9 @@ static void aio_complete(struct aio_kiocb *iocb)
ctx->tail = tail;
ring = kmap_atomic(ctx->ring_pages[0]);
ring = page_address(ctx->ring_pages[0]);
head = ring->head;
ring->tail = tail;
kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]);
ctx->completed_events++;
......@@ -1215,10 +1210,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
mutex_lock(&ctx->ring_lock);
/* Access to ->ring_pages here is protected by ctx->ring_lock. */
ring = kmap_atomic(ctx->ring_pages[0]);
ring = page_address(ctx->ring_pages[0]);
head = ring->head;
tail = ring->tail;
kunmap_atomic(ring);
/*
* Ensure that once we've read the current tail pointer, that
......@@ -1250,10 +1244,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
avail = min(avail, nr - ret);
avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
ev = kmap(page);
ev = page_address(page);
copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail);
kunmap(page);
if (unlikely(copy_ret)) {
ret = -EFAULT;
......@@ -1265,9 +1258,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
head %= ctx->nr_events;
}
ring = kmap_atomic(ctx->ring_pages[0]);
ring = page_address(ctx->ring_pages[0]);
ring->head = head;
kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]);
pr_debug("%li h%u t%u\n", ret, head, tail);
......
......@@ -600,7 +600,7 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap,
p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count++;
dir->i_mtime = current_time(dir);
dir->i_mtime = dir->i_ctime = current_time(dir);
return 0;
}
......@@ -633,7 +633,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
d_inode(dentry)->i_size = 0;
clear_nlink(d_inode(dentry));
dir->i_mtime = current_time(dir);
dir->i_mtime = dir->i_ctime = current_time(dir);
spin_lock(&sbi->lookup_lock);
__autofs_add_expiring(dentry);
......@@ -749,7 +749,7 @@ static int autofs_dir_mkdir(struct mnt_idmap *idmap,
p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count++;
inc_nlink(dir);
dir->i_mtime = current_time(dir);
dir->i_mtime = dir->i_ctime = current_time(dir);
return 0;
}
......
......@@ -111,7 +111,6 @@ void buffer_check_dirty_writeback(struct folio *folio,
bh = bh->b_this_page;
} while (bh != head);
}
EXPORT_SYMBOL(buffer_check_dirty_writeback);
/*
* Block until a buffer comes unlocked. This doesn't stop it
......
......@@ -451,7 +451,8 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
ret = cachefiles_inject_write_error();
if (ret == 0) {
file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath, S_IFREG,
file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath,
S_IFREG | 0600,
O_RDWR | O_LARGEFILE | O_DIRECT,
cache->cache_cred);
ret = PTR_ERR_OR_ZERO(file);
......
......@@ -150,7 +150,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
cd->major = major;
cd->baseminor = baseminor;
cd->minorct = minorct;
strlcpy(cd->name, name, sizeof(cd->name));
strscpy(cd->name, name, sizeof(cd->name));
if (!prev) {
cd->next = curr;
......
......@@ -648,7 +648,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
} else {
struct mnt_idmap *idmap;
struct inode *inode;
int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW |
O_LARGEFILE | O_EXCL;
if (cprm.limit < binfmt->min_coredump)
......
......@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/prefetch.h>
#include "mount.h"
#include "internal.h"
struct prepend_buffer {
char *buf;
......
......@@ -33,17 +33,17 @@ struct eventfd_ctx {
/*
* Every time that a write(2) is performed on an eventfd, the
* value of the __u64 being written is added to "count" and a
* wakeup is performed on "wqh". A read(2) will return the "count"
* value to userspace, and will reset "count" to zero. The kernel
* side eventfd_signal() also, adds to the "count" counter and
* issue a wakeup.
* wakeup is performed on "wqh". If EFD_SEMAPHORE flag was not
* specified, a read(2) will return the "count" value to userspace,
* and will reset "count" to zero. The kernel side eventfd_signal()
* also, adds to the "count" counter and issue a wakeup.
*/
__u64 count;
unsigned int flags;
int id;
};
__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask)
__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask)
{
unsigned long flags;
......@@ -301,6 +301,8 @@ static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
(unsigned long long)ctx->count);
spin_unlock_irq(&ctx->wqh.lock);
seq_printf(m, "eventfd-id: %d\n", ctx->id);
seq_printf(m, "eventfd-semaphore: %d\n",
!!(ctx->flags & EFD_SEMAPHORE));
}
#endif
......
......@@ -536,7 +536,7 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
#else
static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
unsigned pollflags)
__poll_t pollflags)
{
wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
}
......
......@@ -561,7 +561,8 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
return -ENOMEM;
}
ctx->legacy_data[size++] = ',';
if (size)
ctx->legacy_data[size++] = ',';
len = strlen(param->key);
memcpy(ctx->legacy_data + size, param->key, len);
size += len;
......
......@@ -120,6 +120,47 @@ void put_super(struct super_block *sb);
extern bool mount_capable(struct fs_context *);
int sb_init_dio_done_wq(struct super_block *sb);
/*
* Prepare superblock for changing its read-only state (i.e., either remount
* read-write superblock read-only or vice versa). After this function returns
* mnt_is_readonly() will return true for any mount of the superblock if its
* caller is able to observe any changes done by the remount. This holds until
* sb_end_ro_state_change() is called.
*/
static inline void sb_start_ro_state_change(struct super_block *sb)
{
WRITE_ONCE(sb->s_readonly_remount, 1);
/*
* For RO->RW transition, the barrier pairs with the barrier in
* mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY
* cleared, it will see s_readonly_remount set.
* For RW->RO transition, the barrier pairs with the barrier in
* __mnt_want_write() before the mnt_is_readonly() check. The barrier
* makes sure if __mnt_want_write() sees MNT_WRITE_HOLD already
* cleared, it will see s_readonly_remount set.
*/
smp_wmb();
}
/*
* Ends section changing read-only state of the superblock. After this function
* returns if mnt_is_readonly() returns false, the caller will be able to
* observe all the changes remount did to the superblock.
*/
static inline void sb_end_ro_state_change(struct super_block *sb)
{
/*
* This barrier provides release semantics that pairs with
* the smp_rmb() acquire semantics in mnt_is_readonly().
* This barrier pair ensure that when mnt_is_readonly() sees
* 0 for sb->s_readonly_remount, it will also see all the
* preceding flag changes that were made during the RO state
* change.
*/
smp_wmb();
WRITE_ONCE(sb->s_readonly_remount, 0);
}
/*
* open.c
*/
......
......@@ -211,7 +211,10 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
ic->scan_dents = NULL;
cond_resched();
}
jffs2_build_xattr_subsystem(c);
ret = jffs2_build_xattr_subsystem(c);
if (ret)
goto exit;
c->flags &= ~JFFS2_SB_FLAG_BUILDING;
dbg_fsbuild("FS build complete\n");
......
......@@ -772,10 +772,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
}
#define XREF_TMPHASH_SIZE (128)
void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
{
struct jffs2_xattr_ref *ref, *_ref;
struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE];
struct jffs2_xattr_ref **xref_tmphash;
struct jffs2_xattr_datum *xd, *_xd;
struct jffs2_inode_cache *ic;
struct jffs2_raw_node_ref *raw;
......@@ -784,9 +784,12 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING));
xref_tmphash = kcalloc(XREF_TMPHASH_SIZE,
sizeof(struct jffs2_xattr_ref *), GFP_KERNEL);
if (!xref_tmphash)
return -ENOMEM;
/* Phase.1 : Merge same xref */
for (i=0; i < XREF_TMPHASH_SIZE; i++)
xref_tmphash[i] = NULL;
for (ref=c->xref_temp; ref; ref=_ref) {
struct jffs2_xattr_ref *tmp;
......@@ -884,6 +887,8 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
"%u of xref (%u dead, %u orphan) found.\n",
xdatum_count, xdatum_unchecked_count, xdatum_orphan_count,
xref_count, xref_dead_count, xref_orphan_count);
kfree(xref_tmphash);
return 0;
}
struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
......
......@@ -71,7 +71,7 @@ static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref)
#ifdef CONFIG_JFFS2_FS_XATTR
extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c);
extern void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
extern int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c);
extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
......@@ -103,7 +103,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
#else
#define jffs2_init_xattr_subsystem(c)
#define jffs2_build_xattr_subsystem(c)
#define jffs2_build_xattr_subsystem(c) (0)
#define jffs2_clear_xattr_subsystem(c)
#define jffs2_xattr_do_crccheck_inode(c, ic)
......
......@@ -876,7 +876,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
tid_t tid;
ino_t ino = 0;
struct component_name dname;
int ssize; /* source pathname size */
u32 ssize; /* source pathname size */
struct btstack btstack;
struct inode *ip = d_inode(dentry);
s64 xlen = 0;
......@@ -957,7 +957,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
if (ssize > sizeof (JFS_IP(ip)->i_inline))
JFS_IP(ip)->mode2 &= ~INLINEEA;
jfs_info("jfs_symlink: fast symlink added ssize:%d name:%s ",
jfs_info("jfs_symlink: fast symlink added ssize:%u name:%s ",
ssize, name);
}
/*
......@@ -987,7 +987,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
ip->i_size = ssize - 1;
while (ssize) {
/* This is kind of silly since PATH_MAX == 4K */
int copy_size = min(ssize, PSIZE);
u32 copy_size = min_t(u32, ssize, PSIZE);
mp = get_metapage(ip, xaddr, PSIZE, 1);
......
......@@ -309,9 +309,16 @@ static unsigned int mnt_get_writers(struct mount *mnt)
static int mnt_is_readonly(struct vfsmount *mnt)
{
if (mnt->mnt_sb->s_readonly_remount)
if (READ_ONCE(mnt->mnt_sb->s_readonly_remount))
return 1;
/* Order wrt setting s_flags/s_readonly_remount in do_remount() */
/*
* The barrier pairs with the barrier in sb_start_ro_state_change()
* making sure if we don't see s_readonly_remount set yet, we also will
* not see any superblock / mount flag changes done by remount.
* It also pairs with the barrier in sb_end_ro_state_change()
* assuring that if we see s_readonly_remount already cleared, we will
* see the values of superblock / mount flags updated by remount.
*/
smp_rmb();
return __mnt_is_readonly(mnt);
}
......@@ -364,9 +371,11 @@ int __mnt_want_write(struct vfsmount *m)
}
}
/*
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
* be set to match its requirements. So we must not load that until
* MNT_WRITE_HOLD is cleared.
* The barrier pairs with the barrier sb_start_ro_state_change() making
* sure that if we see MNT_WRITE_HOLD cleared, we will also see
* s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in
* mnt_is_readonly() and bail in case we are racing with remount
* read-only.
*/
smp_rmb();
if (mnt_is_readonly(m)) {
......@@ -588,10 +597,8 @@ int sb_prepare_remount_readonly(struct super_block *sb)
if (!err && atomic_long_read(&sb->s_remove_count))
err = -EBUSY;
if (!err) {
sb->s_readonly_remount = 1;
smp_wmb();
}
if (!err)
sb_start_ro_state_change(sb);
list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
......
......@@ -700,10 +700,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
return do_fchmodat(AT_FDCWD, filename, mode);
}
/**
* setattr_vfsuid - check and set ia_fsuid attribute
* @kuid: new inode owner
*
/*
* Check whether @kuid is valid and if so generate and set vfsuid_t in
* ia_vfsuid.
*
......@@ -718,10 +715,7 @@ static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid)
return true;
}
/**
* setattr_vfsgid - check and set ia_fsgid attribute
* @kgid: new inode owner
*
/*
* Check whether @kgid is valid and if so generate and set vfsgid_t in
* ia_vfsgid.
*
......@@ -989,7 +983,6 @@ static int do_dentry_open(struct file *f,
* @file: file pointer
* @dentry: pointer to dentry
* @open: open callback
* @opened: state of open
*
* This can be used to finish opening a file passed to i_op->atomic_open().
*
......@@ -1043,7 +1036,6 @@ EXPORT_SYMBOL(file_path);
* vfs_open - open the file at the given path
* @path: path to open
* @file: newly allocated file with f_flag initialized
* @cred: credentials to use
*/
int vfs_open(const struct path *path, struct file *file)
{
......@@ -1156,7 +1148,7 @@ inline struct open_how build_open_how(int flags, umode_t mode)
inline int build_open_flags(const struct open_how *how, struct open_flags *op)
{
u64 flags = how->flags;
u64 strip = FMODE_NONOTIFY | O_CLOEXEC;
u64 strip = __FMODE_NONOTIFY | O_CLOEXEC;
int lookup_flags = 0;
int acc_mode = ACC_MODE(flags);
......
......@@ -35,7 +35,7 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
}
/* No atime modification nor notify on underlying */
#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
#define OVL_OPEN_FLAGS (O_NOATIME | __FMODE_NONOTIFY)
static struct file *ovl_open_realfile(const struct file *file,
const struct path *realpath)
......
......@@ -131,7 +131,7 @@ struct old_linux_dirent {
unsigned long d_ino;
unsigned long d_offset;
unsigned short d_namlen;
char d_name[1];
char d_name[];
};
struct readdir_callback {
......@@ -208,7 +208,7 @@ struct linux_dirent {
unsigned long d_ino;
unsigned long d_off;
unsigned short d_reclen;
char d_name[1];
char d_name[];
};
struct getdents_callback {
......@@ -388,7 +388,7 @@ struct compat_old_linux_dirent {
compat_ulong_t d_ino;
compat_ulong_t d_offset;
unsigned short d_namlen;
char d_name[1];
char d_name[];
};
struct compat_readdir_callback {
......@@ -460,7 +460,7 @@ struct compat_linux_dirent {
compat_ulong_t d_ino;
compat_ulong_t d_off;
unsigned short d_reclen;
char d_name[1];
char d_name[];
};
struct compat_getdents_callback {
......
......@@ -15,6 +15,7 @@
#include <linux/mount.h>
#include <linux/fs.h>
#include <linux/dax.h>
#include <linux/overflow.h>
#include "internal.h"
#include <linux/uaccess.h>
......@@ -101,10 +102,12 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
bool write)
{
loff_t tmp;
if (unlikely(pos < 0 || len < 0))
return -EINVAL;
if (unlikely((loff_t) (pos + len) < 0))
if (unlikely(check_add_overflow(pos, len, &tmp)))
return -EINVAL;
return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
......
......@@ -595,7 +595,7 @@ struct super_block *sget_fc(struct fs_context *fc,
fc->s_fs_info = NULL;
s->s_type = fc->fs_type;
s->s_iflags |= fc->s_iflags;
strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id));
strscpy(s->s_id, s->s_type->name, sizeof(s->s_id));
list_add_tail(&s->s_list, &super_blocks);
hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
spin_unlock(&sb_lock);
......@@ -674,7 +674,7 @@ struct super_block *sget(struct file_system_type *type,
return ERR_PTR(err);
}
s->s_type = type;
strlcpy(s->s_id, type->name, sizeof(s->s_id));
strscpy(s->s_id, type->name, sizeof(s->s_id));
list_add_tail(&s->s_list, &super_blocks);
hlist_add_head(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
......@@ -903,6 +903,7 @@ int reconfigure_super(struct fs_context *fc)
struct super_block *sb = fc->root->d_sb;
int retval;
bool remount_ro = false;
bool remount_rw = false;
bool force = fc->sb_flags & SB_FORCE;
if (fc->sb_flags_mask & ~MS_RMT_MASK)
......@@ -920,7 +921,7 @@ int reconfigure_super(struct fs_context *fc)
bdev_read_only(sb->s_bdev))
return -EACCES;
#endif
remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb);
remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
}
......@@ -943,13 +944,18 @@ int reconfigure_super(struct fs_context *fc)
*/
if (remount_ro) {
if (force) {
sb->s_readonly_remount = 1;
smp_wmb();
sb_start_ro_state_change(sb);
} else {
retval = sb_prepare_remount_readonly(sb);
if (retval)
return retval;
}
} else if (remount_rw) {
/*
* Protect filesystem's reconfigure code from writes from
* userspace until reconfigure finishes.
*/
sb_start_ro_state_change(sb);
}
if (fc->ops->reconfigure) {
......@@ -965,9 +971,7 @@ int reconfigure_super(struct fs_context *fc)
WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
(fc->sb_flags & fc->sb_flags_mask)));
/* Needs to be ordered wrt mnt_is_readonly() */
smp_wmb();
sb->s_readonly_remount = 0;
sb_end_ro_state_change(sb);
/*
* Some filesystems modify their metadata via some other path than the
......@@ -982,7 +986,7 @@ int reconfigure_super(struct fs_context *fc)
return 0;
cancel_readonly:
sb->s_readonly_remount = 0;
sb_end_ro_state_change(sb);
return retval;
}
......
......@@ -52,7 +52,7 @@ static int sysv_handle_dirsync(struct inode *dir)
}
/*
* Calls to dir_get_page()/put_and_unmap_page() must be nested according to the
* Calls to dir_get_page()/unmap_and_put_page() must be nested according to the
* rules documented in mm/highmem.rst.
*
* NOTE: sysv_find_entry() and sysv_dotdot() act as calls to dir_get_page()
......@@ -103,11 +103,11 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN),
fs16_to_cpu(SYSV_SB(sb), de->inode),
DT_UNKNOWN)) {
put_and_unmap_page(page, kaddr);
unmap_and_put_page(page, kaddr);
return 0;
}
}
put_and_unmap_page(page, kaddr);
unmap_and_put_page(page, kaddr);
}
return 0;
}
......@@ -131,7 +131,7 @@ static inline int namecompare(int len, int maxlen,
* itself (as a parameter - res_dir). It does NOT read the inode of the
* entry - you'll have to do that yourself if you want to.
*
* On Success put_and_unmap_page() should be called on *res_page.
* On Success unmap_and_put_page() should be called on *res_page.
*
* sysv_find_entry() acts as a call to dir_get_page() and must be treated
* accordingly for nesting purposes.
......@@ -166,7 +166,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
name, de->name))
goto found;
}
put_and_unmap_page(page, kaddr);
unmap_and_put_page(page, kaddr);
}
if (++n >= npages)
......@@ -209,7 +209,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
goto out_page;
de++;
}
put_and_unmap_page(page, kaddr);
unmap_and_put_page(page, kaddr);
}
BUG();
return -EINVAL;
......@@ -228,7 +228,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
mark_inode_dirty(dir);
err = sysv_handle_dirsync(dir);
out_page:
put_and_unmap_page(page, kaddr);
unmap_and_put_page(page, kaddr);
return err;
out_unlock:
unlock_page(page);
......@@ -321,12 +321,12 @@ int sysv_empty_dir(struct inode * inode)
if (de->name[1] != '.' || de->name[2])
goto not_empty;
}
put_and_unmap_page(page, kaddr);
unmap_and_put_page(page, kaddr);
}
return 1;
not_empty:
put_and_unmap_page(page, kaddr);
unmap_and_put_page(page, kaddr);
return 0;
}
......@@ -352,7 +352,7 @@ int sysv_set_link(struct sysv_dir_entry *de, struct page *page,
}
/*
* Calls to dir_get_page()/put_and_unmap_page() must be nested according to the
* Calls to dir_get_page()/unmap_and_put_page() must be nested according to the
* rules documented in mm/highmem.rst.
*
* sysv_dotdot() acts as a call to dir_get_page() and must be treated
......@@ -376,7 +376,7 @@ ino_t sysv_inode_by_name(struct dentry *dentry)
if (de) {
res = fs16_to_cpu(SYSV_SB(dentry->d_sb), de->inode);
put_and_unmap_page(page, de);
unmap_and_put_page(page, de);
}
return res;
}
......@@ -145,6 +145,10 @@ static int alloc_branch(struct inode *inode,
*/
parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key);
bh = sb_getblk(inode->i_sb, parent);
if (!bh) {
sysv_free_block(inode->i_sb, branch[n].key);
break;
}
lock_buffer(bh);
memset(bh->b_data, 0, blocksize);
branch[n].bh = bh;
......
......@@ -164,7 +164,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry)
inode->i_ctime = dir->i_ctime;
inode_dec_link_count(inode);
}
put_and_unmap_page(page, de);
unmap_and_put_page(page, de);
return err;
}
......@@ -227,7 +227,7 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (!new_de)
goto out_dir;
err = sysv_set_link(new_de, new_page, old_inode);
put_and_unmap_page(new_page, new_de);
unmap_and_put_page(new_page, new_de);
if (err)
goto out_dir;
new_inode->i_ctime = current_time(new_inode);
......@@ -256,9 +256,9 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
out_dir:
if (dir_de)
put_and_unmap_page(dir_page, dir_de);
unmap_and_put_page(dir_page, dir_de);
out_old:
put_and_unmap_page(old_page, old_de);
unmap_and_put_page(old_page, old_de);
out:
return err;
}
......
......@@ -9,12 +9,12 @@
#ifndef _LINUX_EVENTFD_H
#define _LINUX_EVENTFD_H
#include <linux/fcntl.h>
#include <linux/wait.h>
#include <linux/err.h>
#include <linux/percpu-defs.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <uapi/linux/eventfd.h>
/*
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
......@@ -23,10 +23,6 @@
* from eventfd, in order to leave a free define-space for
* shared O_* flags.
*/
#define EFD_SEMAPHORE (1 << 0)
#define EFD_CLOEXEC O_CLOEXEC
#define EFD_NONBLOCK O_NONBLOCK
#define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
......@@ -40,7 +36,7 @@ struct file *eventfd_fget(int fd);
struct eventfd_ctx *eventfd_ctx_fdget(int fd);
struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask);
__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask);
int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
__u64 *cnt);
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
......
......@@ -956,29 +956,35 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
index < ra->start + ra->size);
}
/*
* f_{lock,count,pos_lock} members can be highly contended and share
* the same cacheline. f_{lock,mode} are very frequently used together
* and so share the same cacheline as well. The read-mostly
* f_{path,inode,op} are kept on a separate cacheline.
*/
struct file {
union {
struct llist_node f_llist;
struct rcu_head f_rcuhead;
unsigned int f_iocb_flags;
};
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
atomic_long_t f_count;
struct mutex f_pos_lock;
loff_t f_pos;
unsigned int f_flags;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
u64 f_version;
#ifdef CONFIG_SECURITY
......@@ -1242,7 +1248,7 @@ struct super_block {
*/
atomic_long_t s_fsnotify_connectors;
/* Being remounted read-only */
/* Read-only state of the superblock is being changed */
int s_readonly_remount;
/* per-sb errseq_t for reporting writeback errors via syncfs */
......@@ -2669,7 +2675,7 @@ extern void evict_inodes(struct super_block *sb);
void dump_mapping(const struct address_space *);
/*
* Userspace may rely on the the inode number being non-zero. For example, glibc
* Userspace may rely on the inode number being non-zero. For example, glibc
* simply ignores files with zero i_ino in unlink() and other places.
*
* As an additional complication, if userspace was compiled with
......
......@@ -507,7 +507,7 @@ static inline void folio_zero_range(struct folio *folio,
zero_user_segments(&folio->page, start, start + length, 0, 0);
}
static inline void put_and_unmap_page(struct page *page, void *addr)
static inline void unmap_and_put_page(struct page *page, void *addr)
{
kunmap_local(addr);
put_page(page);
......
......@@ -261,18 +261,14 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
extern const struct pipe_buf_operations nosteal_pipe_buf_ops;
#ifdef CONFIG_WATCH_QUEUE
unsigned long account_pipe_buffers(struct user_struct *user,
unsigned long old, unsigned long new);
bool too_many_pipe_buffers_soft(unsigned long user_bufs);
bool too_many_pipe_buffers_hard(unsigned long user_bufs);
bool pipe_is_unprivileged_user(void);
#endif
/* for F_SETPIPE_SZ and F_GETPIPE_SZ */
#ifdef CONFIG_WATCH_QUEUE
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
#endif
long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
......
......@@ -158,6 +158,8 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
void arch_report_meminfo(struct seq_file *m);
#else /* CONFIG_PROC_FS */
static inline void proc_root_init(void)
......
......@@ -38,7 +38,7 @@ struct watch_filter {
struct watch_queue {
struct rcu_head rcu;
struct watch_filter __rcu *filter;
struct pipe_inode_info *pipe; /* The pipe we're using as a buffer */
struct pipe_inode_info *pipe; /* Pipe we use as a buffer, NULL if queue closed */
struct hlist_head watches; /* Contributory watches */
struct page **notes; /* Preallocated notifications */
unsigned long *notes_bitmap; /* Allocation bitmap for notes */
......@@ -46,7 +46,6 @@ struct watch_queue {
spinlock_t lock;
unsigned int nr_notes; /* Number of notes */
unsigned int nr_pages; /* Number of pages in notes[] */
bool defunct; /* T when queues closed */
};
/*
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_LINUX_EVENTFD_H
#define _UAPI_LINUX_EVENTFD_H
#include <linux/fcntl.h>
#define EFD_SEMAPHORE (1 << 0)
#define EFD_CLOEXEC O_CLOEXEC
#define EFD_NONBLOCK O_NONBLOCK
#endif /* _UAPI_LINUX_EVENTFD_H */
......@@ -338,7 +338,7 @@ __setup("rootfstype=", fs_names_setup);
__setup("rootdelay=", root_delay_setup);
/* This can return zero length strings. Caller should check */
static int __init split_fs_names(char *page, size_t size, char *names)
static int __init split_fs_names(char *page, size_t size)
{
int count = 1;
char *p = page;
......@@ -402,7 +402,7 @@ void __init mount_block_root(char *name, int flags)
scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
if (root_fs_names)
num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names);
num_fs = split_fs_names(fs_names, PAGE_SIZE);
else
num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
retry:
......@@ -545,7 +545,7 @@ static int __init mount_nodev_root(void)
fs_names = (void *)__get_free_page(GFP_KERNEL);
if (!fs_names)
return -EINVAL;
num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names);
num_fs = split_fs_names(fs_names, PAGE_SIZE);
for (i = 0, fstype = fs_names; i < num_fs;
i++, fstype += strlen(fstype) + 1) {
......
......@@ -42,7 +42,7 @@ MODULE_AUTHOR("Red Hat, Inc.");
static inline bool lock_wqueue(struct watch_queue *wqueue)
{
spin_lock_bh(&wqueue->lock);
if (unlikely(wqueue->defunct)) {
if (unlikely(!wqueue->pipe)) {
spin_unlock_bh(&wqueue->lock);
return false;
}
......@@ -104,9 +104,6 @@ static bool post_one_notification(struct watch_queue *wqueue,
unsigned int head, tail, mask, note, offset, len;
bool done = false;
if (!pipe)
return false;
spin_lock_irq(&pipe->rd_wait.lock);
mask = pipe->ring_size - 1;
......@@ -603,8 +600,11 @@ void watch_queue_clear(struct watch_queue *wqueue)
rcu_read_lock();
spin_lock_bh(&wqueue->lock);
/* Prevent new notifications from being stored. */
wqueue->defunct = true;
/*
* This pipe can be freed by callers like free_pipe_info().
* Removing this reference also prevents new notifications.
*/
wqueue->pipe = NULL;
while (!hlist_empty(&wqueue->watches)) {
watch = hlist_entry(wqueue->watches.first, struct watch, queue_node);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment