Commit 7166c326 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.12-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:
 "afs:
   - Fix a lock recursion in afs_wake_up_async_call() on ->notify_lock

 netfs:
   - Drop the references to a folio immediately after the folio has been
     extracted to prevent races with future I/O collection

   - Fix a documenation build error

   - Downgrade the i_rwsem for buffered writes to fix a cifs reported
     performance regression when switching to netfslib

  vfs:
   - Explicitly return -E2BIG from openat2() if the specified size is
     unexpectedly large. This aligns openat2() with other extensible
     struct based system calls

   - When copying a mount namespace ensure that we only try to remove
     the new copy from the mount namespace rbtree if it has already been
     added to it

  nilfs:
   - Clear the buffer delay flag when clearing the buffer state clags
     when a buffer head is discarded to prevent a kernel OOPs

  ocfs2:
   - Fix an unitialized value warning in ocfs2_setattr()

  proc:
   - Fix a kernel doc warning"

* tag 'vfs-6.12-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  proc: Fix W=1 build kernel-doc warning
  afs: Fix lock recursion
  fs: Fix uninitialized value issue in from_kuid and from_kgid
  fs: don't try and remove empty rbtree node
  netfs: Downgrade i_rwsem for a buffered write
  nilfs2: fix kernel bug due to missing clearing of buffer delay flag
  openat2: explicitly return -E2BIG for (usize > PAGE_SIZE)
  netfs: fix documentation build error
  netfs: In readahead, put the folio refs as soon extracted
parents a777c32c 197231da
...@@ -592,4 +592,3 @@ API Function Reference ...@@ -592,4 +592,3 @@ API Function Reference
.. kernel-doc:: include/linux/netfs.h .. kernel-doc:: include/linux/netfs.h
.. kernel-doc:: fs/netfs/buffered_read.c .. kernel-doc:: fs/netfs/buffered_read.c
.. kernel-doc:: fs/netfs/io.c
...@@ -130,6 +130,7 @@ struct afs_call { ...@@ -130,6 +130,7 @@ struct afs_call {
wait_queue_head_t waitq; /* processes awaiting completion */ wait_queue_head_t waitq; /* processes awaiting completion */
struct work_struct async_work; /* async I/O processor */ struct work_struct async_work; /* async I/O processor */
struct work_struct work; /* actual work processor */ struct work_struct work; /* actual work processor */
struct work_struct free_work; /* Deferred free processor */
struct rxrpc_call *rxcall; /* RxRPC call handle */ struct rxrpc_call *rxcall; /* RxRPC call handle */
struct rxrpc_peer *peer; /* Remote endpoint */ struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* security for this call */ struct key *key; /* security for this call */
...@@ -1331,6 +1332,7 @@ extern int __net_init afs_open_socket(struct afs_net *); ...@@ -1331,6 +1332,7 @@ extern int __net_init afs_open_socket(struct afs_net *);
extern void __net_exit afs_close_socket(struct afs_net *); extern void __net_exit afs_close_socket(struct afs_net *);
extern void afs_charge_preallocation(struct work_struct *); extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *); extern void afs_put_call(struct afs_call *);
void afs_deferred_put_call(struct afs_call *call);
void afs_make_call(struct afs_call *call, gfp_t gfp); void afs_make_call(struct afs_call *call, gfp_t gfp);
void afs_wait_for_call_to_complete(struct afs_call *call); void afs_wait_for_call_to_complete(struct afs_call *call);
extern struct afs_call *afs_alloc_flat_call(struct afs_net *, extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
struct workqueue_struct *afs_async_calls; struct workqueue_struct *afs_async_calls;
static void afs_deferred_free_worker(struct work_struct *work);
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_process_async_call(struct work_struct *); static void afs_process_async_call(struct work_struct *);
...@@ -149,6 +150,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net, ...@@ -149,6 +150,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
call->debug_id = atomic_inc_return(&rxrpc_debug_id); call->debug_id = atomic_inc_return(&rxrpc_debug_id);
refcount_set(&call->ref, 1); refcount_set(&call->ref, 1);
INIT_WORK(&call->async_work, afs_process_async_call); INIT_WORK(&call->async_work, afs_process_async_call);
INIT_WORK(&call->free_work, afs_deferred_free_worker);
init_waitqueue_head(&call->waitq); init_waitqueue_head(&call->waitq);
spin_lock_init(&call->state_lock); spin_lock_init(&call->state_lock);
call->iter = &call->def_iter; call->iter = &call->def_iter;
...@@ -159,6 +161,36 @@ static struct afs_call *afs_alloc_call(struct afs_net *net, ...@@ -159,6 +161,36 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
return call; return call;
} }
static void afs_free_call(struct afs_call *call)
{
struct afs_net *net = call->net;
int o;
ASSERT(!work_pending(&call->async_work));
rxrpc_kernel_put_peer(call->peer);
if (call->rxcall) {
rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
rxrpc_kernel_put_call(net->socket, call->rxcall);
call->rxcall = NULL;
}
if (call->type->destructor)
call->type->destructor(call);
afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call);
kfree(call->request);
o = atomic_read(&net->nr_outstanding_calls);
trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
__builtin_return_address(0));
kfree(call);
o = atomic_dec_return(&net->nr_outstanding_calls);
if (o == 0)
wake_up_var(&net->nr_outstanding_calls);
}
/* /*
* Dispose of a reference on a call. * Dispose of a reference on a call.
*/ */
...@@ -173,32 +205,34 @@ void afs_put_call(struct afs_call *call) ...@@ -173,32 +205,34 @@ void afs_put_call(struct afs_call *call)
o = atomic_read(&net->nr_outstanding_calls); o = atomic_read(&net->nr_outstanding_calls);
trace_afs_call(debug_id, afs_call_trace_put, r - 1, o, trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
__builtin_return_address(0)); __builtin_return_address(0));
if (zero)
afs_free_call(call);
}
if (zero) { static void afs_deferred_free_worker(struct work_struct *work)
ASSERT(!work_pending(&call->async_work)); {
ASSERT(call->type->name != NULL); struct afs_call *call = container_of(work, struct afs_call, free_work);
rxrpc_kernel_put_peer(call->peer);
if (call->rxcall) {
rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
rxrpc_kernel_put_call(net->socket, call->rxcall);
call->rxcall = NULL;
}
if (call->type->destructor)
call->type->destructor(call);
afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call); afs_free_call(call);
kfree(call->request); }
trace_afs_call(call->debug_id, afs_call_trace_free, 0, o, /*
__builtin_return_address(0)); * Dispose of a reference on a call, deferring the cleanup to a workqueue
kfree(call); * to avoid lock recursion.
*/
void afs_deferred_put_call(struct afs_call *call)
{
struct afs_net *net = call->net;
unsigned int debug_id = call->debug_id;
bool zero;
int r, o;
o = atomic_dec_return(&net->nr_outstanding_calls); zero = __refcount_dec_and_test(&call->ref, &r);
if (o == 0) o = atomic_read(&net->nr_outstanding_calls);
wake_up_var(&net->nr_outstanding_calls); trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
} __builtin_return_address(0));
if (zero)
schedule_work(&call->free_work);
} }
static struct afs_call *afs_get_call(struct afs_call *call, static struct afs_call *afs_get_call(struct afs_call *call,
...@@ -640,7 +674,8 @@ static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall, ...@@ -640,7 +674,8 @@ static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
} }
/* /*
* wake up an asynchronous call * Wake up an asynchronous call. The caller is holding the call notify
* spinlock around this, so we can't call afs_put_call().
*/ */
static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
unsigned long call_user_ID) unsigned long call_user_ID)
...@@ -657,7 +692,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, ...@@ -657,7 +692,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
__builtin_return_address(0)); __builtin_return_address(0));
if (!queue_work(afs_async_calls, &call->async_work)) if (!queue_work(afs_async_calls, &call->async_work))
afs_put_call(call); afs_deferred_put_call(call);
} }
} }
......
...@@ -3944,7 +3944,9 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, ...@@ -3944,7 +3944,9 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
new = copy_tree(old, old->mnt.mnt_root, copy_flags); new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) { if (IS_ERR(new)) {
namespace_unlock(); namespace_unlock();
free_mnt_ns(new_ns); ns_free_inum(&new_ns->ns);
dec_mnt_namespaces(new_ns->ucounts);
mnt_ns_release(new_ns);
return ERR_CAST(new); return ERR_CAST(new);
} }
if (user_ns != ns->user_ns) { if (user_ns != ns->user_ns) {
......
...@@ -67,7 +67,8 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in ...@@ -67,7 +67,8 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in
* Decant the list of folios to read into a rolling buffer. * Decant the list of folios to read into a rolling buffer.
*/ */
static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq, static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
struct folio_queue *folioq) struct folio_queue *folioq,
struct folio_batch *put_batch)
{ {
unsigned int order, nr; unsigned int order, nr;
size_t size = 0; size_t size = 0;
...@@ -82,6 +83,9 @@ static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq, ...@@ -82,6 +83,9 @@ static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
order = folio_order(folio); order = folio_order(folio);
folioq->orders[i] = order; folioq->orders[i] = order;
size += PAGE_SIZE << order; size += PAGE_SIZE << order;
if (!folio_batch_add(put_batch, folio))
folio_batch_release(put_batch);
} }
for (int i = nr; i < folioq_nr_slots(folioq); i++) for (int i = nr; i < folioq_nr_slots(folioq); i++)
...@@ -120,6 +124,9 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) ...@@ -120,6 +124,9 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
* that we will need to release later - but we don't want to do * that we will need to release later - but we don't want to do
* that until after we've started the I/O. * that until after we've started the I/O.
*/ */
struct folio_batch put_batch;
folio_batch_init(&put_batch);
while (rreq->submitted < subreq->start + rsize) { while (rreq->submitted < subreq->start + rsize) {
struct folio_queue *tail = rreq->buffer_tail, *new; struct folio_queue *tail = rreq->buffer_tail, *new;
size_t added; size_t added;
...@@ -132,10 +139,11 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) ...@@ -132,10 +139,11 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
new->prev = tail; new->prev = tail;
tail->next = new; tail->next = new;
rreq->buffer_tail = new; rreq->buffer_tail = new;
added = netfs_load_buffer_from_ra(rreq, new); added = netfs_load_buffer_from_ra(rreq, new, &put_batch);
rreq->iter.count += added; rreq->iter.count += added;
rreq->submitted += added; rreq->submitted += added;
} }
folio_batch_release(&put_batch);
} }
subreq->len = rsize; subreq->len = rsize;
...@@ -348,6 +356,7 @@ static int netfs_wait_for_read(struct netfs_io_request *rreq) ...@@ -348,6 +356,7 @@ static int netfs_wait_for_read(struct netfs_io_request *rreq)
static int netfs_prime_buffer(struct netfs_io_request *rreq) static int netfs_prime_buffer(struct netfs_io_request *rreq)
{ {
struct folio_queue *folioq; struct folio_queue *folioq;
struct folio_batch put_batch;
size_t added; size_t added;
folioq = kmalloc(sizeof(*folioq), GFP_KERNEL); folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
...@@ -360,39 +369,14 @@ static int netfs_prime_buffer(struct netfs_io_request *rreq) ...@@ -360,39 +369,14 @@ static int netfs_prime_buffer(struct netfs_io_request *rreq)
rreq->submitted = rreq->start; rreq->submitted = rreq->start;
iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0); iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0);
added = netfs_load_buffer_from_ra(rreq, folioq); folio_batch_init(&put_batch);
added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch);
folio_batch_release(&put_batch);
rreq->iter.count += added; rreq->iter.count += added;
rreq->submitted += added; rreq->submitted += added;
return 0; return 0;
} }
/*
* Drop the ref on each folio that we inherited from the VM readahead code. We
* still have the folio locks to pin the page until we complete the I/O.
*
* Note that we can't just release the batch in each queue struct as we use the
* occupancy count in other places.
*/
static void netfs_put_ra_refs(struct folio_queue *folioq)
{
struct folio_batch fbatch;
folio_batch_init(&fbatch);
while (folioq) {
for (unsigned int slot = 0; slot < folioq_count(folioq); slot++) {
struct folio *folio = folioq_folio(folioq, slot);
if (!folio)
continue;
trace_netfs_folio(folio, netfs_folio_trace_read_put);
if (!folio_batch_add(&fbatch, folio))
folio_batch_release(&fbatch);
}
folioq = folioq->next;
}
folio_batch_release(&fbatch);
}
/** /**
* netfs_readahead - Helper to manage a read request * netfs_readahead - Helper to manage a read request
* @ractl: The description of the readahead request * @ractl: The description of the readahead request
...@@ -436,9 +420,6 @@ void netfs_readahead(struct readahead_control *ractl) ...@@ -436,9 +420,6 @@ void netfs_readahead(struct readahead_control *ractl)
goto cleanup_free; goto cleanup_free;
netfs_read_to_pagecache(rreq); netfs_read_to_pagecache(rreq);
/* Release the folio refs whilst we're waiting for the I/O. */
netfs_put_ra_refs(rreq->buffer);
netfs_put_request(rreq, true, netfs_rreq_trace_put_return); netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
return; return;
......
...@@ -109,6 +109,7 @@ int netfs_start_io_write(struct inode *inode) ...@@ -109,6 +109,7 @@ int netfs_start_io_write(struct inode *inode)
up_write(&inode->i_rwsem); up_write(&inode->i_rwsem);
return -ERESTARTSYS; return -ERESTARTSYS;
} }
downgrade_write(&inode->i_rwsem);
return 0; return 0;
} }
EXPORT_SYMBOL(netfs_start_io_write); EXPORT_SYMBOL(netfs_start_io_write);
...@@ -123,7 +124,7 @@ EXPORT_SYMBOL(netfs_start_io_write); ...@@ -123,7 +124,7 @@ EXPORT_SYMBOL(netfs_start_io_write);
void netfs_end_io_write(struct inode *inode) void netfs_end_io_write(struct inode *inode)
__releases(inode->i_rwsem) __releases(inode->i_rwsem)
{ {
up_write(&inode->i_rwsem); up_read(&inode->i_rwsem);
} }
EXPORT_SYMBOL(netfs_end_io_write); EXPORT_SYMBOL(netfs_end_io_write);
......
...@@ -77,6 +77,8 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq, ...@@ -77,6 +77,8 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
folio_unlock(folio); folio_unlock(folio);
} }
} }
folioq_clear(folioq, slot);
} }
/* /*
......
...@@ -77,7 +77,8 @@ void nilfs_forget_buffer(struct buffer_head *bh) ...@@ -77,7 +77,8 @@ void nilfs_forget_buffer(struct buffer_head *bh)
const unsigned long clear_bits = const unsigned long clear_bits =
(BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) |
BIT(BH_Delay));
lock_buffer(bh); lock_buffer(bh);
set_mask_bits(&bh->b_state, clear_bits, 0); set_mask_bits(&bh->b_state, clear_bits, 0);
...@@ -406,7 +407,8 @@ void nilfs_clear_folio_dirty(struct folio *folio) ...@@ -406,7 +407,8 @@ void nilfs_clear_folio_dirty(struct folio *folio)
const unsigned long clear_bits = const unsigned long clear_bits =
(BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected)); BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) |
BIT(BH_Delay));
bh = head; bh = head;
do { do {
......
...@@ -1129,9 +1129,12 @@ int ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry, ...@@ -1129,9 +1129,12 @@ int ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
trace_ocfs2_setattr(inode, dentry, trace_ocfs2_setattr(inode, dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno,
dentry->d_name.len, dentry->d_name.name, dentry->d_name.len, dentry->d_name.name,
attr->ia_valid, attr->ia_mode, attr->ia_valid,
from_kuid(&init_user_ns, attr->ia_uid), attr->ia_valid & ATTR_MODE ? attr->ia_mode : 0,
from_kgid(&init_user_ns, attr->ia_gid)); attr->ia_valid & ATTR_UID ?
from_kuid(&init_user_ns, attr->ia_uid) : 0,
attr->ia_valid & ATTR_GID ?
from_kgid(&init_user_ns, attr->ia_gid) : 0);
/* ensuring we don't even attempt to truncate a symlink */ /* ensuring we don't even attempt to truncate a symlink */
if (S_ISLNK(inode->i_mode)) if (S_ISLNK(inode->i_mode))
......
...@@ -1457,6 +1457,8 @@ SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename, ...@@ -1457,6 +1457,8 @@ SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
if (unlikely(usize < OPEN_HOW_SIZE_VER0)) if (unlikely(usize < OPEN_HOW_SIZE_VER0))
return -EINVAL; return -EINVAL;
if (unlikely(usize > PAGE_SIZE))
return -E2BIG;
err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize); err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
if (err) if (err)
......
...@@ -77,7 +77,7 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file) ...@@ -77,7 +77,7 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file)
return single_open(file, seq_show, inode); return single_open(file, seq_show, inode);
} }
/** /*
* Shared /proc/pid/fdinfo and /proc/pid/fdinfo/fd permission helper to ensure * Shared /proc/pid/fdinfo and /proc/pid/fdinfo/fd permission helper to ensure
* that the current task has PTRACE_MODE_READ in addition to the normal * that the current task has PTRACE_MODE_READ in addition to the normal
* POSIX-like checks. * POSIX-like checks.
......
...@@ -172,7 +172,6 @@ ...@@ -172,7 +172,6 @@
EM(netfs_folio_trace_read, "read") \ EM(netfs_folio_trace_read, "read") \
EM(netfs_folio_trace_read_done, "read-done") \ EM(netfs_folio_trace_read_done, "read-done") \
EM(netfs_folio_trace_read_gaps, "read-gaps") \ EM(netfs_folio_trace_read_gaps, "read-gaps") \
EM(netfs_folio_trace_read_put, "read-put") \
EM(netfs_folio_trace_read_unlock, "read-unlock") \ EM(netfs_folio_trace_read_unlock, "read-unlock") \
EM(netfs_folio_trace_redirtied, "redirtied") \ EM(netfs_folio_trace_redirtied, "redirtied") \
EM(netfs_folio_trace_store, "store") \ EM(netfs_folio_trace_store, "store") \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment