Commit 69456535 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Support directly accessing host page cache from virtiofs. This can
   improve I/O performance for various workloads, as well as reducing
   the memory requirement by eliminating double caching. Thanks to Vivek
   Goyal for doing most of the work on this.

 - Allow automatic submounting inside virtiofs. This allows unique
   st_dev/ st_ino values to be assigned inside the guest to files
   residing on different filesystems on the host. Thanks to Max Reitz
   for the patches.

 - Fix an old use after free bug found by Pradeep P V K.

* tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (25 commits)
  virtiofs: calculate number of scatter-gather elements accurately
  fuse: connection remove fix
  fuse: implement crossmounts
  fuse: Allow fuse_fill_super_common() for submounts
  fuse: split fuse_mount off of fuse_conn
  fuse: drop fuse_conn parameter where possible
  fuse: store fuse_conn in fuse_req
  fuse: add submount support to <uapi/linux/fuse.h>
  fuse: fix page dereference after free
  virtiofs: add logic to free up a memory range
  virtiofs: maintain a list of busy elements
  virtiofs: serialize truncate/punch_hole and dax fault path
  virtiofs: define dax address space operations
  virtiofs: add DAX mmap support
  virtiofs: implement dax read/write operations
  virtiofs: introduce setupmapping/removemapping commands
  virtiofs: implement FUSE_INIT map_alignment field
  virtiofs: keep a list of free dax memory ranges
  virtiofs: add a mount option to enable dax
  virtiofs: set up virtio_fs dax_device
  ...
parents 922a763a 42d3e2d0
...@@ -47,7 +47,7 @@ filesystems. A good example is sshfs: a secure network filesystem ...@@ -47,7 +47,7 @@ filesystems. A good example is sshfs: a secure network filesystem
using the sftp protocol. using the sftp protocol.
The userspace library and utilities are available from the The userspace library and utilities are available from the
`FUSE homepage: <http://fuse.sourceforge.net/>`_ `FUSE homepage: <https://github.com/libfuse/>`_
Filesystem type Filesystem type
=============== ===============
......
...@@ -7238,7 +7238,7 @@ FUSE: FILESYSTEM IN USERSPACE ...@@ -7238,7 +7238,7 @@ FUSE: FILESYSTEM IN USERSPACE
M: Miklos Szeredi <miklos@szeredi.hu> M: Miklos Szeredi <miklos@szeredi.hu>
L: linux-fsdevel@vger.kernel.org L: linux-fsdevel@vger.kernel.org
S: Maintained S: Maintained
W: http://fuse.sourceforge.net/ W: https://github.com/libfuse/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git
F: Documentation/filesystems/fuse.rst F: Documentation/filesystems/fuse.rst
F: fs/fuse/ F: fs/fuse/
......
...@@ -46,7 +46,8 @@ EXPORT_SYMBOL_GPL(dax_read_unlock); ...@@ -46,7 +46,8 @@ EXPORT_SYMBOL_GPL(dax_read_unlock);
int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
pgoff_t *pgoff) pgoff_t *pgoff)
{ {
phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512; sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
phys_addr_t phys_off = (start_sect + sector) * 512;
if (pgoff) if (pgoff)
*pgoff = PHYS_PFN(phys_off); *pgoff = PHYS_PFN(phys_off);
......
...@@ -559,8 +559,11 @@ static void *grab_mapping_entry(struct xa_state *xas, ...@@ -559,8 +559,11 @@ static void *grab_mapping_entry(struct xa_state *xas,
} }
/** /**
* dax_layout_busy_page - find first pinned page in @mapping * dax_layout_busy_page_range - find first pinned page in @mapping
* @mapping: address space to scan for a page with ref count > 1 * @mapping: address space to scan for a page with ref count > 1
* @start: Starting offset. Page containing 'start' is included.
* @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX,
* pages from 'start' till the end of file are included.
* *
* DAX requires ZONE_DEVICE mapped pages. These pages are never * DAX requires ZONE_DEVICE mapped pages. These pages are never
* 'onlined' to the page allocator so they are considered idle when * 'onlined' to the page allocator so they are considered idle when
...@@ -573,12 +576,15 @@ static void *grab_mapping_entry(struct xa_state *xas, ...@@ -573,12 +576,15 @@ static void *grab_mapping_entry(struct xa_state *xas,
* to be able to run unmap_mapping_range() and subsequently not race * to be able to run unmap_mapping_range() and subsequently not race
* mapping_mapped() becoming true. * mapping_mapped() becoming true.
*/ */
struct page *dax_layout_busy_page(struct address_space *mapping) struct page *dax_layout_busy_page_range(struct address_space *mapping,
loff_t start, loff_t end)
{ {
XA_STATE(xas, &mapping->i_pages, 0);
void *entry; void *entry;
unsigned int scanned = 0; unsigned int scanned = 0;
struct page *page = NULL; struct page *page = NULL;
pgoff_t start_idx = start >> PAGE_SHIFT;
pgoff_t end_idx;
XA_STATE(xas, &mapping->i_pages, start_idx);
/* /*
* In the 'limited' case get_user_pages() for dax is disabled. * In the 'limited' case get_user_pages() for dax is disabled.
...@@ -589,6 +595,11 @@ struct page *dax_layout_busy_page(struct address_space *mapping) ...@@ -589,6 +595,11 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
if (!dax_mapping(mapping) || !mapping_mapped(mapping)) if (!dax_mapping(mapping) || !mapping_mapped(mapping))
return NULL; return NULL;
/* If end == LLONG_MAX, all pages from start to till end of file */
if (end == LLONG_MAX)
end_idx = ULONG_MAX;
else
end_idx = end >> PAGE_SHIFT;
/* /*
* If we race get_user_pages_fast() here either we'll see the * If we race get_user_pages_fast() here either we'll see the
* elevated page count in the iteration and wait, or * elevated page count in the iteration and wait, or
...@@ -596,15 +607,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping) ...@@ -596,15 +607,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
* against is no longer mapped in the page tables and bail to the * against is no longer mapped in the page tables and bail to the
* get_user_pages() slow path. The slow path is protected by * get_user_pages() slow path. The slow path is protected by
* pte_lock() and pmd_lock(). New references are not taken without * pte_lock() and pmd_lock(). New references are not taken without
* holding those locks, and unmap_mapping_range() will not zero the * holding those locks, and unmap_mapping_pages() will not zero the
* pte or pmd without holding the respective lock, so we are * pte or pmd without holding the respective lock, so we are
* guaranteed to either see new references or prevent new * guaranteed to either see new references or prevent new
* references from being established. * references from being established.
*/ */
unmap_mapping_range(mapping, 0, 0, 0); unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0);
xas_lock_irq(&xas); xas_lock_irq(&xas);
xas_for_each(&xas, entry, ULONG_MAX) { xas_for_each(&xas, entry, end_idx) {
if (WARN_ON_ONCE(!xa_is_value(entry))) if (WARN_ON_ONCE(!xa_is_value(entry)))
continue; continue;
if (unlikely(dax_is_locked(entry))) if (unlikely(dax_is_locked(entry)))
...@@ -625,6 +636,12 @@ struct page *dax_layout_busy_page(struct address_space *mapping) ...@@ -625,6 +636,12 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
return page; return page;
} }
EXPORT_SYMBOL_GPL(dax_layout_busy_page_range);
struct page *dax_layout_busy_page(struct address_space *mapping)
{
return dax_layout_busy_page_range(mapping, 0, LLONG_MAX);
}
EXPORT_SYMBOL_GPL(dax_layout_busy_page); EXPORT_SYMBOL_GPL(dax_layout_busy_page);
static int __dax_invalidate_entry(struct address_space *mapping, static int __dax_invalidate_entry(struct address_space *mapping,
......
...@@ -8,7 +8,7 @@ config FUSE_FS ...@@ -8,7 +8,7 @@ config FUSE_FS
There's also a companion library: libfuse2. This library is available There's also a companion library: libfuse2. This library is available
from the FUSE homepage: from the FUSE homepage:
<http://fuse.sourceforge.net/> <https://github.com/libfuse/>
although chances are your distribution already has that library although chances are your distribution already has that library
installed if you've installed the "fuse" package itself. installed if you've installed the "fuse" package itself.
...@@ -38,3 +38,17 @@ config VIRTIO_FS ...@@ -38,3 +38,17 @@ config VIRTIO_FS
If you want to share files between guests or with the host, answer Y If you want to share files between guests or with the host, answer Y
or M. or M.
config FUSE_DAX
bool "Virtio Filesystem Direct Host Memory Access support"
default y
select INTERVAL_TREE
depends on VIRTIO_FS
depends on FS_DAX
depends on DAX_DRIVER
help
This allows bypassing guest page cache and allows mapping host page
cache directly in guest address space.
If you want to allow mounting a Virtio Filesystem with the "dax"
option, answer Y.
...@@ -7,5 +7,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o ...@@ -7,5 +7,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
virtiofs-y += virtio_fs.o fuse-$(CONFIG_FUSE_DAX) += dax.o
virtiofs-y := virtio_fs.o
...@@ -164,6 +164,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file, ...@@ -164,6 +164,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
{ {
unsigned val; unsigned val;
struct fuse_conn *fc; struct fuse_conn *fc;
struct fuse_mount *fm;
ssize_t ret; ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val, ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
...@@ -174,18 +175,27 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file, ...@@ -174,18 +175,27 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
if (!fc) if (!fc)
goto out; goto out;
down_read(&fc->killsb);
spin_lock(&fc->bg_lock); spin_lock(&fc->bg_lock);
fc->congestion_threshold = val; fc->congestion_threshold = val;
if (fc->sb) {
/*
* Get any fuse_mount belonging to this fuse_conn; s_bdi is
* shared between all of them
*/
if (!list_empty(&fc->mounts)) {
fm = list_first_entry(&fc->mounts, struct fuse_mount, fc_entry);
if (fc->num_background < fc->congestion_threshold) { if (fc->num_background < fc->congestion_threshold) {
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
} else { } else {
set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
} }
} }
spin_unlock(&fc->bg_lock); spin_unlock(&fc->bg_lock);
up_read(&fc->killsb);
fuse_conn_put(fc); fuse_conn_put(fc);
out: out:
return ret; return ret;
......
...@@ -57,6 +57,7 @@ ...@@ -57,6 +57,7 @@
struct cuse_conn { struct cuse_conn {
struct list_head list; /* linked on cuse_conntbl */ struct list_head list; /* linked on cuse_conntbl */
struct fuse_mount fm; /* Dummy mount referencing fc */
struct fuse_conn fc; /* fuse connection */ struct fuse_conn fc; /* fuse connection */
struct cdev *cdev; /* associated character device */ struct cdev *cdev; /* associated character device */
struct device *dev; /* device representing @cdev */ struct device *dev; /* device representing @cdev */
...@@ -134,7 +135,7 @@ static int cuse_open(struct inode *inode, struct file *file) ...@@ -134,7 +135,7 @@ static int cuse_open(struct inode *inode, struct file *file)
* Generic permission check is already done against the chrdev * Generic permission check is already done against the chrdev
* file, proceed to open. * file, proceed to open.
*/ */
rc = fuse_do_open(&cc->fc, 0, file, 0); rc = fuse_do_open(&cc->fm, 0, file, 0);
if (rc) if (rc)
fuse_conn_put(&cc->fc); fuse_conn_put(&cc->fc);
return rc; return rc;
...@@ -143,10 +144,10 @@ static int cuse_open(struct inode *inode, struct file *file) ...@@ -143,10 +144,10 @@ static int cuse_open(struct inode *inode, struct file *file)
static int cuse_release(struct inode *inode, struct file *file) static int cuse_release(struct inode *inode, struct file *file)
{ {
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc; struct fuse_mount *fm = ff->fm;
fuse_sync_release(NULL, ff, file->f_flags); fuse_sync_release(NULL, ff, file->f_flags);
fuse_conn_put(fc); fuse_conn_put(fm->fc);
return 0; return 0;
} }
...@@ -155,7 +156,7 @@ static long cuse_file_ioctl(struct file *file, unsigned int cmd, ...@@ -155,7 +156,7 @@ static long cuse_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct cuse_conn *cc = fc_to_cc(ff->fc); struct cuse_conn *cc = fc_to_cc(ff->fm->fc);
unsigned int flags = 0; unsigned int flags = 0;
if (cc->unrestricted_ioctl) if (cc->unrestricted_ioctl)
...@@ -168,7 +169,7 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd, ...@@ -168,7 +169,7 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct cuse_conn *cc = fc_to_cc(ff->fc); struct cuse_conn *cc = fc_to_cc(ff->fm->fc);
unsigned int flags = FUSE_IOCTL_COMPAT; unsigned int flags = FUSE_IOCTL_COMPAT;
if (cc->unrestricted_ioctl) if (cc->unrestricted_ioctl)
...@@ -313,9 +314,10 @@ struct cuse_init_args { ...@@ -313,9 +314,10 @@ struct cuse_init_args {
* required data structures for it. Please read the comment at the * required data structures for it. Please read the comment at the
* top of this file for high level overview. * top of this file for high level overview.
*/ */
static void cuse_process_init_reply(struct fuse_conn *fc, static void cuse_process_init_reply(struct fuse_mount *fm,
struct fuse_args *args, int error) struct fuse_args *args, int error)
{ {
struct fuse_conn *fc = fm->fc;
struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args); struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_args_pages *ap = &ia->ap; struct fuse_args_pages *ap = &ia->ap;
struct cuse_conn *cc = fc_to_cc(fc), *pos; struct cuse_conn *cc = fc_to_cc(fc), *pos;
...@@ -424,7 +426,7 @@ static int cuse_send_init(struct cuse_conn *cc) ...@@ -424,7 +426,7 @@ static int cuse_send_init(struct cuse_conn *cc)
{ {
int rc; int rc;
struct page *page; struct page *page;
struct fuse_conn *fc = &cc->fc; struct fuse_mount *fm = &cc->fm;
struct cuse_init_args *ia; struct cuse_init_args *ia;
struct fuse_args_pages *ap; struct fuse_args_pages *ap;
...@@ -460,7 +462,7 @@ static int cuse_send_init(struct cuse_conn *cc) ...@@ -460,7 +462,7 @@ static int cuse_send_init(struct cuse_conn *cc)
ia->desc.length = ap->args.out_args[1].size; ia->desc.length = ap->args.out_args[1].size;
ap->args.end = cuse_process_init_reply; ap->args.end = cuse_process_init_reply;
rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL); rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
if (rc) { if (rc) {
kfree(ia); kfree(ia);
err_free_page: err_free_page:
...@@ -506,7 +508,8 @@ static int cuse_channel_open(struct inode *inode, struct file *file) ...@@ -506,7 +508,8 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
* Limit the cuse channel to requests that can * Limit the cuse channel to requests that can
* be represented in file->f_cred->user_ns. * be represented in file->f_cred->user_ns.
*/ */
fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL); fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns,
&fuse_dev_fiq_ops, NULL);
fud = fuse_dev_alloc_install(&cc->fc); fud = fuse_dev_alloc_install(&cc->fc);
if (!fud) { if (!fud) {
......
// SPDX-License-Identifier: GPL-2.0
/*
* dax: direct host memory access
* Copyright (C) 2020 Red Hat, Inc.
*/
#include "fuse_i.h"
#include <linux/delay.h>
#include <linux/dax.h>
#include <linux/uio.h>
#include <linux/pfn_t.h>
#include <linux/iomap.h>
#include <linux/interval_tree.h>
/*
* Default memory range size. A power of 2 so it agrees with common FUSE_INIT
* map_alignment values 4KB and 64KB.
*/
#define FUSE_DAX_SHIFT 21
#define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT)
#define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE)
/* Number of ranges reclaimer will try to free in one invocation */
#define FUSE_DAX_RECLAIM_CHUNK (10)
/*
* Dax memory reclaim threshold in percetage of total ranges. When free
* number of free ranges drops below this threshold, reclaim can trigger
* Default is 20%
*/
#define FUSE_DAX_RECLAIM_THRESHOLD (20)
/** Translation information for file offsets to DAX window offsets */
struct fuse_dax_mapping {
/* Pointer to inode where this memory range is mapped */
struct inode *inode;
/* Will connect in fcd->free_ranges to keep track of free memory */
struct list_head list;
/* For interval tree in file/inode */
struct interval_tree_node itn;
/* Will connect in fc->busy_ranges to keep track busy memory */
struct list_head busy_list;
/** Position in DAX window */
u64 window_offset;
/** Length of mapping, in bytes */
loff_t length;
/* Is this mapping read-only or read-write */
bool writable;
/* reference count when the mapping is used by dax iomap. */
refcount_t refcnt;
};
/* Per-inode dax map */
struct fuse_inode_dax {
/* Semaphore to protect modifications to the dmap tree */
struct rw_semaphore sem;
/* Sorted rb tree of struct fuse_dax_mapping elements */
struct rb_root_cached tree;
unsigned long nr;
};
struct fuse_conn_dax {
/* DAX device */
struct dax_device *dev;
/* Lock protecting accessess to members of this structure */
spinlock_t lock;
/* List of memory ranges which are busy */
unsigned long nr_busy_ranges;
struct list_head busy_ranges;
/* Worker to free up memory ranges */
struct delayed_work free_work;
/* Wait queue for a dax range to become free */
wait_queue_head_t range_waitq;
/* DAX Window Free Ranges */
long nr_free_ranges;
struct list_head free_ranges;
unsigned long nr_ranges;
};
static inline struct fuse_dax_mapping *
node_to_dmap(struct interval_tree_node *node)
{
if (!node)
return NULL;
return container_of(node, struct fuse_dax_mapping, itn);
}
static struct fuse_dax_mapping *
alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode);
static void
__kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms)
{
unsigned long free_threshold;
/* If number of free ranges are below threshold, start reclaim */
free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100,
1);
if (fcd->nr_free_ranges < free_threshold)
queue_delayed_work(system_long_wq, &fcd->free_work,
msecs_to_jiffies(delay_ms));
}
static void kick_dmap_free_worker(struct fuse_conn_dax *fcd,
unsigned long delay_ms)
{
spin_lock(&fcd->lock);
__kick_dmap_free_worker(fcd, delay_ms);
spin_unlock(&fcd->lock);
}
static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd)
{
struct fuse_dax_mapping *dmap;
spin_lock(&fcd->lock);
dmap = list_first_entry_or_null(&fcd->free_ranges,
struct fuse_dax_mapping, list);
if (dmap) {
list_del_init(&dmap->list);
WARN_ON(fcd->nr_free_ranges <= 0);
fcd->nr_free_ranges--;
}
spin_unlock(&fcd->lock);
kick_dmap_free_worker(fcd, 0);
return dmap;
}
/* This assumes fcd->lock is held */
static void __dmap_remove_busy_list(struct fuse_conn_dax *fcd,
struct fuse_dax_mapping *dmap)
{
list_del_init(&dmap->busy_list);
WARN_ON(fcd->nr_busy_ranges == 0);
fcd->nr_busy_ranges--;
}
static void dmap_remove_busy_list(struct fuse_conn_dax *fcd,
struct fuse_dax_mapping *dmap)
{
spin_lock(&fcd->lock);
__dmap_remove_busy_list(fcd, dmap);
spin_unlock(&fcd->lock);
}
/* This assumes fcd->lock is held */
static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
struct fuse_dax_mapping *dmap)
{
list_add_tail(&dmap->list, &fcd->free_ranges);
fcd->nr_free_ranges++;
wake_up(&fcd->range_waitq);
}
static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
struct fuse_dax_mapping *dmap)
{
/* Return fuse_dax_mapping to free list */
spin_lock(&fcd->lock);
__dmap_add_to_free_pool(fcd, dmap);
spin_unlock(&fcd->lock);
}
static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx,
struct fuse_dax_mapping *dmap, bool writable,
bool upgrade)
{
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_conn_dax *fcd = fm->fc->dax;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_setupmapping_in inarg;
loff_t offset = start_idx << FUSE_DAX_SHIFT;
FUSE_ARGS(args);
ssize_t err;
WARN_ON(fcd->nr_free_ranges < 0);
/* Ask fuse daemon to setup mapping */
memset(&inarg, 0, sizeof(inarg));
inarg.foffset = offset;
inarg.fh = -1;
inarg.moffset = dmap->window_offset;
inarg.len = FUSE_DAX_SZ;
inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
if (writable)
inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
args.opcode = FUSE_SETUPMAPPING;
args.nodeid = fi->nodeid;
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
err = fuse_simple_request(fm, &args);
if (err < 0)
return err;
dmap->writable = writable;
if (!upgrade) {
/*
* We don't take a refernce on inode. inode is valid right now
* and when inode is going away, cleanup logic should first
* cleanup dmap entries.
*/
dmap->inode = inode;
dmap->itn.start = dmap->itn.last = start_idx;
/* Protected by fi->dax->sem */
interval_tree_insert(&dmap->itn, &fi->dax->tree);
fi->dax->nr++;
spin_lock(&fcd->lock);
list_add_tail(&dmap->busy_list, &fcd->busy_ranges);
fcd->nr_busy_ranges++;
spin_unlock(&fcd->lock);
}
return 0;
}
static int fuse_send_removemapping(struct inode *inode,
struct fuse_removemapping_in *inargp,
struct fuse_removemapping_one *remove_one)
{
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
args.opcode = FUSE_REMOVEMAPPING;
args.nodeid = fi->nodeid;
args.in_numargs = 2;
args.in_args[0].size = sizeof(*inargp);
args.in_args[0].value = inargp;
args.in_args[1].size = inargp->count * sizeof(*remove_one);
args.in_args[1].value = remove_one;
return fuse_simple_request(fm, &args);
}
static int dmap_removemapping_list(struct inode *inode, unsigned int num,
struct list_head *to_remove)
{
struct fuse_removemapping_one *remove_one, *ptr;
struct fuse_removemapping_in inarg;
struct fuse_dax_mapping *dmap;
int ret, i = 0, nr_alloc;
nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS);
if (!remove_one)
return -ENOMEM;
ptr = remove_one;
list_for_each_entry(dmap, to_remove, list) {
ptr->moffset = dmap->window_offset;
ptr->len = dmap->length;
ptr++;
i++;
num--;
if (i >= nr_alloc || num == 0) {
memset(&inarg, 0, sizeof(inarg));
inarg.count = i;
ret = fuse_send_removemapping(inode, &inarg,
remove_one);
if (ret)
goto out;
ptr = remove_one;
i = 0;
}
}
out:
kfree(remove_one);
return ret;
}
/*
* Cleanup dmap entry and add back to free list. This should be called with
* fcd->lock held.
*/
static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd,
struct fuse_dax_mapping *dmap)
{
pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n",
dmap->itn.start, dmap->itn.last, dmap->window_offset,
dmap->length);
__dmap_remove_busy_list(fcd, dmap);
dmap->inode = NULL;
dmap->itn.start = dmap->itn.last = 0;
__dmap_add_to_free_pool(fcd, dmap);
}
/*
* Free inode dmap entries whose range falls inside [start, end].
* Does not take any locks. At this point of time it should only be
* called from evict_inode() path where we know all dmap entries can be
* reclaimed.
*/
static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd,
struct inode *inode,
loff_t start, loff_t end)
{
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_dax_mapping *dmap, *n;
int err, num = 0;
LIST_HEAD(to_remove);
unsigned long start_idx = start >> FUSE_DAX_SHIFT;
unsigned long end_idx = end >> FUSE_DAX_SHIFT;
struct interval_tree_node *node;
while (1) {
node = interval_tree_iter_first(&fi->dax->tree, start_idx,
end_idx);
if (!node)
break;
dmap = node_to_dmap(node);
/* inode is going away. There should not be any users of dmap */
WARN_ON(refcount_read(&dmap->refcnt) > 1);
interval_tree_remove(&dmap->itn, &fi->dax->tree);
num++;
list_add(&dmap->list, &to_remove);
}
/* Nothing to remove */
if (list_empty(&to_remove))
return;
WARN_ON(fi->dax->nr < num);
fi->dax->nr -= num;
err = dmap_removemapping_list(inode, num, &to_remove);
if (err && err != -ENOTCONN) {
pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n",
start, end);
}
spin_lock(&fcd->lock);
list_for_each_entry_safe(dmap, n, &to_remove, list) {
list_del_init(&dmap->list);
dmap_reinit_add_to_free_pool(fcd, dmap);
}
spin_unlock(&fcd->lock);
}
static int dmap_removemapping_one(struct inode *inode,
struct fuse_dax_mapping *dmap)
{
struct fuse_removemapping_one forget_one;
struct fuse_removemapping_in inarg;
memset(&inarg, 0, sizeof(inarg));
inarg.count = 1;
memset(&forget_one, 0, sizeof(forget_one));
forget_one.moffset = dmap->window_offset;
forget_one.len = dmap->length;
return fuse_send_removemapping(inode, &inarg, &forget_one);
}
/*
* It is called from evict_inode() and by that time inode is going away. So
* this function does not take any locks like fi->dax->sem for traversing
* that fuse inode interval tree. If that lock is taken then lock validator
* complains of deadlock situation w.r.t fs_reclaim lock.
*/
void fuse_dax_inode_cleanup(struct inode *inode)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
/*
* fuse_evict_inode() has already called truncate_inode_pages_final()
* before we arrive here. So we should not have to worry about any
* pages/exception entries still associated with inode.
*/
inode_reclaim_dmap_range(fc->dax, inode, 0, -1);
WARN_ON(fi->dax->nr);
}
static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length)
{
iomap->addr = IOMAP_NULL_ADDR;
iomap->length = length;
iomap->type = IOMAP_HOLE;
}
static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length,
struct iomap *iomap, struct fuse_dax_mapping *dmap,
unsigned int flags)
{
loff_t offset, len;
loff_t i_size = i_size_read(inode);
offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT);
len = min(length, dmap->length - offset);
/* If length is beyond end of file, truncate further */
if (pos + len > i_size)
len = i_size - pos;
if (len > 0) {
iomap->addr = dmap->window_offset + offset;
iomap->length = len;
if (flags & IOMAP_FAULT)
iomap->length = ALIGN(len, PAGE_SIZE);
iomap->type = IOMAP_MAPPED;
/*
* increace refcnt so that reclaim code knows this dmap is in
* use. This assumes fi->dax->sem mutex is held either
* shared/exclusive.
*/
refcount_inc(&dmap->refcnt);
/* iomap->private should be NULL */
WARN_ON_ONCE(iomap->private);
iomap->private = dmap;
} else {
/* Mapping beyond end of file is hole */
fuse_fill_iomap_hole(iomap, length);
}
}
static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos,
loff_t length, unsigned int flags,
struct iomap *iomap)
{
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_conn_dax *fcd = fc->dax;
struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
int ret;
bool writable = flags & IOMAP_WRITE;
unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
struct interval_tree_node *node;
/*
* Can't do inline reclaim in fault path. We call
* dax_layout_busy_page() before we free a range. And
* fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it.
* In fault path we enter with fi->i_mmap_sem held and can't drop
* it. Also in fault path we hold fi->i_mmap_sem shared and not
* exclusive, so that creates further issues with fuse_wait_dax_page().
* Hence return -EAGAIN and fuse_dax_fault() will wait for a memory
* range to become free and retry.
*/
if (flags & IOMAP_FAULT) {
alloc_dmap = alloc_dax_mapping(fcd);
if (!alloc_dmap)
return -EAGAIN;
} else {
alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode);
if (IS_ERR(alloc_dmap))
return PTR_ERR(alloc_dmap);
}
/* If we are here, we should have memory allocated */
if (WARN_ON(!alloc_dmap))
return -EIO;
/*
* Take write lock so that only one caller can try to setup mapping
* and other waits.
*/
down_write(&fi->dax->sem);
/*
* We dropped lock. Check again if somebody else setup
* mapping already.
*/
node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
if (node) {
dmap = node_to_dmap(node);
fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
dmap_add_to_free_pool(fcd, alloc_dmap);
up_write(&fi->dax->sem);
return 0;
}
/* Setup one mapping */
ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap,
writable, false);
if (ret < 0) {
dmap_add_to_free_pool(fcd, alloc_dmap);
up_write(&fi->dax->sem);
return ret;
}
fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
up_write(&fi->dax->sem);
return 0;
}
static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos,
loff_t length, unsigned int flags,
struct iomap *iomap)
{
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_dax_mapping *dmap;
int ret;
unsigned long idx = pos >> FUSE_DAX_SHIFT;
struct interval_tree_node *node;
/*
* Take exclusive lock so that only one caller can try to setup
* mapping and others wait.
*/
down_write(&fi->dax->sem);
node = interval_tree_iter_first(&fi->dax->tree, idx, idx);
/* We are holding either inode lock or i_mmap_sem, and that should
* ensure that dmap can't be truncated. We are holding a reference
* on dmap and that should make sure it can't be reclaimed. So dmap
* should still be there in tree despite the fact we dropped and
* re-acquired the fi->dax->sem lock.
*/
ret = -EIO;
if (WARN_ON(!node))
goto out_err;
dmap = node_to_dmap(node);
/* We took an extra reference on dmap to make sure its not reclaimd.
* Now we hold fi->dax->sem lock and that reference is not needed
* anymore. Drop it.
*/
if (refcount_dec_and_test(&dmap->refcnt)) {
/* refcount should not hit 0. This object only goes
* away when fuse connection goes away
*/
WARN_ON_ONCE(1);
}
/* Maybe another thread already upgraded mapping while we were not
* holding lock.
*/
if (dmap->writable) {
ret = 0;
goto out_fill_iomap;
}
ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true,
true);
if (ret < 0)
goto out_err;
out_fill_iomap:
fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
out_err:
up_write(&fi->dax->sem);
return ret;
}
/* This is just for DAX and the mapping is ephemeral, do not use it for other
* purposes since there is no block device with a permanent mapping.
*/
static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
unsigned int flags, struct iomap *iomap,
struct iomap *srcmap)
{
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_dax_mapping *dmap;
bool writable = flags & IOMAP_WRITE;
unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
struct interval_tree_node *node;
/* We don't support FIEMAP */
if (WARN_ON(flags & IOMAP_REPORT))
return -EIO;
iomap->offset = pos;
iomap->flags = 0;
iomap->bdev = NULL;
iomap->dax_dev = fc->dax->dev;
/*
* Both read/write and mmap path can race here. So we need something
* to make sure if we are setting up mapping, then other path waits
*
* For now, use a semaphore for this. It probably needs to be
* optimized later.
*/
down_read(&fi->dax->sem);
node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
if (node) {
dmap = node_to_dmap(node);
if (writable && !dmap->writable) {
/* Upgrade read-only mapping to read-write. This will
* require exclusive fi->dax->sem lock as we don't want
* two threads to be trying to this simultaneously
* for same dmap. So drop shared lock and acquire
* exclusive lock.
*
* Before dropping fi->dax->sem lock, take reference
* on dmap so that its not freed by range reclaim.
*/
refcount_inc(&dmap->refcnt);
up_read(&fi->dax->sem);
pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n",
__func__, pos, length);
return fuse_upgrade_dax_mapping(inode, pos, length,
flags, iomap);
} else {
fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
up_read(&fi->dax->sem);
return 0;
}
} else {
up_read(&fi->dax->sem);
pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
__func__, pos, length);
if (pos >= i_size_read(inode))
goto iomap_hole;
return fuse_setup_new_dax_mapping(inode, pos, length, flags,
iomap);
}
/*
* If read beyond end of file happnes, fs code seems to return
* it as hole
*/
iomap_hole:
fuse_fill_iomap_hole(iomap, length);
pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n",
__func__, pos, length, iomap->length);
return 0;
}
static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length,
ssize_t written, unsigned int flags,
struct iomap *iomap)
{
struct fuse_dax_mapping *dmap = iomap->private;
if (dmap) {
if (refcount_dec_and_test(&dmap->refcnt)) {
/* refcount should not hit 0. This object only goes
* away when fuse connection goes away
*/
WARN_ON_ONCE(1);
}
}
/* DAX writes beyond end-of-file aren't handled using iomap, so the
* file size is unchanged and there is nothing to do here.
*/
return 0;
}
static const struct iomap_ops fuse_iomap_ops = {
.iomap_begin = fuse_iomap_begin,
.iomap_end = fuse_iomap_end,
};
static void fuse_wait_dax_page(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
up_write(&fi->i_mmap_sem);
schedule();
down_write(&fi->i_mmap_sem);
}
/* Should be called with fi->i_mmap_sem lock held exclusively */
static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
loff_t start, loff_t end)
{
struct page *page;
page = dax_layout_busy_page_range(inode->i_mapping, start, end);
if (!page)
return 0;
*retry = true;
return ___wait_var_event(&page->_refcount,
atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
0, 0, fuse_wait_dax_page(inode));
}
/* dmap_end == 0 leads to unmapping of whole file */
int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start,
u64 dmap_end)
{
bool retry;
int ret;
do {
retry = false;
ret = __fuse_dax_break_layouts(inode, &retry, dmap_start,
dmap_end);
} while (ret == 0 && retry);
return ret;
}
ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!inode_trylock_shared(inode))
return -EAGAIN;
} else {
inode_lock_shared(inode);
}
ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops);
inode_unlock_shared(inode);
/* TODO file_accessed(iocb->f_filp) */
return ret;
}
static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
return (iov_iter_rw(from) == WRITE &&
((iocb->ki_pos) >= i_size_read(inode) ||
(iocb->ki_pos + iov_iter_count(from) > i_size_read(inode))));
}
static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
ssize_t ret;
ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
if (ret < 0)
return ret;
fuse_invalidate_attr(inode);
fuse_write_update_size(inode, iocb->ki_pos);
return ret;
}
ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!inode_trylock(inode))
return -EAGAIN;
} else {
inode_lock(inode);
}
ret = generic_write_checks(iocb, from);
if (ret <= 0)
goto out;
ret = file_remove_privs(iocb->ki_filp);
if (ret)
goto out;
/* TODO file_update_time() but we don't want metadata I/O */
/* Do not use dax for file extending writes as write and on
* disk i_size increase are not atomic otherwise.
*/
if (file_extending_write(iocb, from))
ret = fuse_dax_direct_write(iocb, from);
else
ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops);
out:
inode_unlock(inode);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
}
static int fuse_dax_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc);
}
static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf,
enum page_entry_size pe_size, bool write)
{
vm_fault_t ret;
struct inode *inode = file_inode(vmf->vma->vm_file);
struct super_block *sb = inode->i_sb;
pfn_t pfn;
int error = 0;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_conn_dax *fcd = fc->dax;
bool retry = false;
if (write)
sb_start_pagefault(sb);
retry:
if (retry && !(fcd->nr_free_ranges > 0))
wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0));
/*
* We need to serialize against not only truncate but also against
* fuse dax memory range reclaim. While a range is being reclaimed,
* we do not want any read/write/mmap to make progress and try
* to populate page cache or access memory we are trying to free.
*/
down_read(&get_fuse_inode(inode)->i_mmap_sem);
ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops);
if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) {
error = 0;
retry = true;
up_read(&get_fuse_inode(inode)->i_mmap_sem);
goto retry;
}
if (ret & VM_FAULT_NEEDDSYNC)
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
up_read(&get_fuse_inode(inode)->i_mmap_sem);
if (write)
sb_end_pagefault(sb);
return ret;
}
static vm_fault_t fuse_dax_fault(struct vm_fault *vmf)
{
return __fuse_dax_fault(vmf, PE_SIZE_PTE,
vmf->flags & FAULT_FLAG_WRITE);
}
static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
{
return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE);
}
static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf)
{
return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
}
static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf)
{
return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
}
static const struct vm_operations_struct fuse_dax_vm_ops = {
.fault = fuse_dax_fault,
.huge_fault = fuse_dax_huge_fault,
.page_mkwrite = fuse_dax_page_mkwrite,
.pfn_mkwrite = fuse_dax_pfn_mkwrite,
};
int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma)
{
file_accessed(file);
vma->vm_ops = &fuse_dax_vm_ops;
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
return 0;
}
static int dmap_writeback_invalidate(struct inode *inode,
struct fuse_dax_mapping *dmap)
{
int ret;
loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT;
loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1);
ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos);
if (ret) {
pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n",
ret, start_pos, end_pos);
return ret;
}
ret = invalidate_inode_pages2_range(inode->i_mapping,
start_pos >> PAGE_SHIFT,
end_pos >> PAGE_SHIFT);
if (ret)
pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n",
ret);
return ret;
}
static int reclaim_one_dmap_locked(struct inode *inode,
struct fuse_dax_mapping *dmap)
{
int ret;
struct fuse_inode *fi = get_fuse_inode(inode);
/*
* igrab() was done to make sure inode won't go under us, and this
* further avoids the race with evict().
*/
ret = dmap_writeback_invalidate(inode, dmap);
if (ret)
return ret;
/* Remove dax mapping from inode interval tree now */
interval_tree_remove(&dmap->itn, &fi->dax->tree);
fi->dax->nr--;
/* It is possible that umount/shutdown has killed the fuse connection
* and worker thread is trying to reclaim memory in parallel. Don't
* warn in that case.
*/
ret = dmap_removemapping_one(inode, dmap);
if (ret && ret != -ENOTCONN) {
pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n",
dmap->window_offset, dmap->length, ret);
}
return 0;
}
/* Find first mapped dmap for an inode and return file offset. Caller needs
* to hold fi->dax->sem lock either shared or exclusive.
*/
static struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_dax_mapping *dmap;
struct interval_tree_node *node;
for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node;
node = interval_tree_iter_next(node, 0, -1)) {
dmap = node_to_dmap(node);
/* still in use. */
if (refcount_read(&dmap->refcnt) > 1)
continue;
return dmap;
}
return NULL;
}
/*
* Find first mapping in the tree and free it and return it. Do not add
* it back to free pool.
*/
static struct fuse_dax_mapping *
inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode,
bool *retry)
{
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_dax_mapping *dmap;
u64 dmap_start, dmap_end;
unsigned long start_idx;
int ret;
struct interval_tree_node *node;
down_write(&fi->i_mmap_sem);
/* Lookup a dmap and corresponding file offset to reclaim. */
down_read(&fi->dax->sem);
dmap = inode_lookup_first_dmap(inode);
if (dmap) {
start_idx = dmap->itn.start;
dmap_start = start_idx << FUSE_DAX_SHIFT;
dmap_end = dmap_start + FUSE_DAX_SZ - 1;
}
up_read(&fi->dax->sem);
if (!dmap)
goto out_mmap_sem;
/*
* Make sure there are no references to inode pages using
* get_user_pages()
*/
ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
if (ret) {
pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n",
ret);
dmap = ERR_PTR(ret);
goto out_mmap_sem;
}
down_write(&fi->dax->sem);
node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
/* Range already got reclaimed by somebody else */
if (!node) {
if (retry)
*retry = true;
goto out_write_dmap_sem;
}
dmap = node_to_dmap(node);
/* still in use. */
if (refcount_read(&dmap->refcnt) > 1) {
dmap = NULL;
if (retry)
*retry = true;
goto out_write_dmap_sem;
}
ret = reclaim_one_dmap_locked(inode, dmap);
if (ret < 0) {
dmap = ERR_PTR(ret);
goto out_write_dmap_sem;
}
/* Clean up dmap. Do not add back to free list */
dmap_remove_busy_list(fcd, dmap);
dmap->inode = NULL;
dmap->itn.start = dmap->itn.last = 0;
pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n",
__func__, inode, dmap->window_offset, dmap->length);
out_write_dmap_sem:
up_write(&fi->dax->sem);
out_mmap_sem:
up_write(&fi->i_mmap_sem);
return dmap;
}
static struct fuse_dax_mapping *
alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode)
{
struct fuse_dax_mapping *dmap;
struct fuse_inode *fi = get_fuse_inode(inode);
while (1) {
bool retry = false;
dmap = alloc_dax_mapping(fcd);
if (dmap)
return dmap;
dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry);
/*
* Either we got a mapping or it is an error, return in both
* the cases.
*/
if (dmap)
return dmap;
/* If we could not reclaim a mapping because it
* had a reference or some other temporary failure,
* Try again. We want to give up inline reclaim only
* if there is no range assigned to this node. Otherwise
* if a deadlock is possible if we sleep with fi->i_mmap_sem
* held and worker to free memory can't make progress due
* to unavailability of fi->i_mmap_sem lock. So sleep
* only if fi->dax->nr=0
*/
if (retry)
continue;
/*
* There are no mappings which can be reclaimed. Wait for one.
* We are not holding fi->dax->sem. So it is possible
* that range gets added now. But as we are not holding
* fi->i_mmap_sem, worker should still be able to free up
* a range and wake us up.
*/
if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) {
if (wait_event_killable_exclusive(fcd->range_waitq,
(fcd->nr_free_ranges > 0))) {
return ERR_PTR(-EINTR);
}
}
}
}
static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd,
struct inode *inode,
unsigned long start_idx)
{
int ret;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_dax_mapping *dmap;
struct interval_tree_node *node;
/* Find fuse dax mapping at file offset inode. */
node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
/* Range already got cleaned up by somebody else */
if (!node)
return 0;
dmap = node_to_dmap(node);
/* still in use. */
if (refcount_read(&dmap->refcnt) > 1)
return 0;
ret = reclaim_one_dmap_locked(inode, dmap);
if (ret < 0)
return ret;
/* Cleanup dmap entry and add back to free list */
spin_lock(&fcd->lock);
dmap_reinit_add_to_free_pool(fcd, dmap);
spin_unlock(&fcd->lock);
return ret;
}
/*
* Free a range of memory.
* Locking:
* 1. Take fi->i_mmap_sem to block dax faults.
* 2. Take fi->dax->sem to protect interval tree and also to make sure
* read/write can not reuse a dmap which we might be freeing.
*/
static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd,
struct inode *inode,
unsigned long start_idx,
unsigned long end_idx)
{
int ret;
struct fuse_inode *fi = get_fuse_inode(inode);
loff_t dmap_start = start_idx << FUSE_DAX_SHIFT;
loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1;
down_write(&fi->i_mmap_sem);
ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
if (ret) {
pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n",
ret);
goto out_mmap_sem;
}
down_write(&fi->dax->sem);
ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx);
up_write(&fi->dax->sem);
out_mmap_sem:
up_write(&fi->i_mmap_sem);
return ret;
}
static int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd,
unsigned long nr_to_free)
{
struct fuse_dax_mapping *dmap, *pos, *temp;
int ret, nr_freed = 0;
unsigned long start_idx = 0, end_idx = 0;
struct inode *inode = NULL;
/* Pick first busy range and free it for now*/
while (1) {
if (nr_freed >= nr_to_free)
break;
dmap = NULL;
spin_lock(&fcd->lock);
if (!fcd->nr_busy_ranges) {
spin_unlock(&fcd->lock);
return 0;
}
list_for_each_entry_safe(pos, temp, &fcd->busy_ranges,
busy_list) {
/* skip this range if it's in use. */
if (refcount_read(&pos->refcnt) > 1)
continue;
inode = igrab(pos->inode);
/*
* This inode is going away. That will free
* up all the ranges anyway, continue to
* next range.
*/
if (!inode)
continue;
/*
* Take this element off list and add it tail. If
* this element can't be freed, it will help with
* selecting new element in next iteration of loop.
*/
dmap = pos;
list_move_tail(&dmap->busy_list, &fcd->busy_ranges);
start_idx = end_idx = dmap->itn.start;
break;
}
spin_unlock(&fcd->lock);
if (!dmap)
return 0;
ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx);
iput(inode);
if (ret)
return ret;
nr_freed++;
}
return 0;
}
static void fuse_dax_free_mem_worker(struct work_struct *work)
{
int ret;
struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax,
free_work.work);
ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK);
if (ret) {
pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n",
ret);
}
/* If number of free ranges are still below threhold, requeue */
kick_dmap_free_worker(fcd, 1);
}
static void fuse_free_dax_mem_ranges(struct list_head *mem_list)
{
struct fuse_dax_mapping *range, *temp;
/* Free All allocated elements */
list_for_each_entry_safe(range, temp, mem_list, list) {
list_del(&range->list);
if (!list_empty(&range->busy_list))
list_del(&range->busy_list);
kfree(range);
}
}
void fuse_dax_conn_free(struct fuse_conn *fc)
{
if (fc->dax) {
fuse_free_dax_mem_ranges(&fc->dax->free_ranges);
kfree(fc->dax);
}
}
static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
{
long nr_pages, nr_ranges;
void *kaddr;
pfn_t pfn;
struct fuse_dax_mapping *range;
int ret, id;
size_t dax_size = -1;
unsigned long i;
init_waitqueue_head(&fcd->range_waitq);
INIT_LIST_HEAD(&fcd->free_ranges);
INIT_LIST_HEAD(&fcd->busy_ranges);
INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
id = dax_read_lock();
nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr,
&pfn);
dax_read_unlock(id);
if (nr_pages < 0) {
pr_debug("dax_direct_access() returned %ld\n", nr_pages);
return nr_pages;
}
nr_ranges = nr_pages/FUSE_DAX_PAGES;
pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n",
__func__, nr_pages, nr_ranges);
for (i = 0; i < nr_ranges; i++) {
range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL);
ret = -ENOMEM;
if (!range)
goto out_err;
/* TODO: This offset only works if virtio-fs driver is not
* having some memory hidden at the beginning. This needs
* better handling
*/
range->window_offset = i * FUSE_DAX_SZ;
range->length = FUSE_DAX_SZ;
INIT_LIST_HEAD(&range->busy_list);
refcount_set(&range->refcnt, 1);
list_add_tail(&range->list, &fcd->free_ranges);
}
fcd->nr_free_ranges = nr_ranges;
fcd->nr_ranges = nr_ranges;
return 0;
out_err:
/* Free All allocated elements */
fuse_free_dax_mem_ranges(&fcd->free_ranges);
return ret;
}
int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
{
struct fuse_conn_dax *fcd;
int err;
if (!dax_dev)
return 0;
fcd = kzalloc(sizeof(*fcd), GFP_KERNEL);
if (!fcd)
return -ENOMEM;
spin_lock_init(&fcd->lock);
fcd->dev = dax_dev;
err = fuse_dax_mem_range_init(fcd);
if (err) {
kfree(fcd);
return err;
}
fc->dax = fcd;
return 0;
}
bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
fi->dax = NULL;
if (fc->dax) {
fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT);
if (!fi->dax)
return false;
init_rwsem(&fi->dax->sem);
fi->dax->tree = RB_ROOT_CACHED;
}
return true;
}
static const struct address_space_operations fuse_dax_file_aops = {
.writepages = fuse_dax_writepages,
.direct_IO = noop_direct_IO,
.set_page_dirty = noop_set_page_dirty,
.invalidatepage = noop_invalidatepage,
};
void fuse_dax_inode_init(struct inode *inode)
{
struct fuse_conn *fc = get_fuse_conn(inode);
if (!fc->dax)
return;
inode->i_flags |= S_DAX;
inode->i_data.a_ops = &fuse_dax_file_aops;
}
bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment)
{
if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) {
pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n",
map_alignment, FUSE_DAX_SZ);
return false;
}
return true;
}
void fuse_dax_cancel_work(struct fuse_conn *fc)
{
struct fuse_conn_dax *fcd = fc->dax;
if (fcd)
cancel_delayed_work_sync(&fcd->free_work);
}
EXPORT_SYMBOL_GPL(fuse_dax_cancel_work);
...@@ -40,20 +40,21 @@ static struct fuse_dev *fuse_get_dev(struct file *file) ...@@ -40,20 +40,21 @@ static struct fuse_dev *fuse_get_dev(struct file *file)
return READ_ONCE(file->private_data); return READ_ONCE(file->private_data);
} }
static void fuse_request_init(struct fuse_req *req) static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
{ {
INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->list);
INIT_LIST_HEAD(&req->intr_entry); INIT_LIST_HEAD(&req->intr_entry);
init_waitqueue_head(&req->waitq); init_waitqueue_head(&req->waitq);
refcount_set(&req->count, 1); refcount_set(&req->count, 1);
__set_bit(FR_PENDING, &req->flags); __set_bit(FR_PENDING, &req->flags);
req->fm = fm;
} }
static struct fuse_req *fuse_request_alloc(gfp_t flags) static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
{ {
struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags); struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
if (req) if (req)
fuse_request_init(req); fuse_request_init(fm, req);
return req; return req;
} }
...@@ -100,10 +101,11 @@ static void fuse_drop_waiting(struct fuse_conn *fc) ...@@ -100,10 +101,11 @@ static void fuse_drop_waiting(struct fuse_conn *fc)
} }
} }
static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); static void fuse_put_request(struct fuse_req *req);
static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background) static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background)
{ {
struct fuse_conn *fc = fm->fc;
struct fuse_req *req; struct fuse_req *req;
int err; int err;
atomic_inc(&fc->num_waiting); atomic_inc(&fc->num_waiting);
...@@ -125,7 +127,7 @@ static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background) ...@@ -125,7 +127,7 @@ static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background)
if (fc->conn_error) if (fc->conn_error)
goto out; goto out;
req = fuse_request_alloc(GFP_KERNEL); req = fuse_request_alloc(fm, GFP_KERNEL);
err = -ENOMEM; err = -ENOMEM;
if (!req) { if (!req) {
if (for_background) if (for_background)
...@@ -143,7 +145,7 @@ static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background) ...@@ -143,7 +145,7 @@ static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background)
if (unlikely(req->in.h.uid == ((uid_t)-1) || if (unlikely(req->in.h.uid == ((uid_t)-1) ||
req->in.h.gid == ((gid_t)-1))) { req->in.h.gid == ((gid_t)-1))) {
fuse_put_request(fc, req); fuse_put_request(req);
return ERR_PTR(-EOVERFLOW); return ERR_PTR(-EOVERFLOW);
} }
return req; return req;
...@@ -153,8 +155,10 @@ static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background) ...@@ -153,8 +155,10 @@ static struct fuse_req *fuse_get_req(struct fuse_conn *fc, bool for_background)
return ERR_PTR(err); return ERR_PTR(err);
} }
static void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) static void fuse_put_request(struct fuse_req *req)
{ {
struct fuse_conn *fc = req->fm->fc;
if (refcount_dec_and_test(&req->count)) { if (refcount_dec_and_test(&req->count)) {
if (test_bit(FR_BACKGROUND, &req->flags)) { if (test_bit(FR_BACKGROUND, &req->flags)) {
/* /*
...@@ -273,8 +277,10 @@ static void flush_bg_queue(struct fuse_conn *fc) ...@@ -273,8 +277,10 @@ static void flush_bg_queue(struct fuse_conn *fc)
* the 'end' callback is called if given, else the reference to the * the 'end' callback is called if given, else the reference to the
* request is released * request is released
*/ */
void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) void fuse_request_end(struct fuse_req *req)
{ {
struct fuse_mount *fm = req->fm;
struct fuse_conn *fc = fm->fc;
struct fuse_iqueue *fiq = &fc->iq; struct fuse_iqueue *fiq = &fc->iq;
if (test_and_set_bit(FR_FINISHED, &req->flags)) if (test_and_set_bit(FR_FINISHED, &req->flags))
...@@ -309,9 +315,9 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) ...@@ -309,9 +315,9 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
wake_up(&fc->blocked_waitq); wake_up(&fc->blocked_waitq);
} }
if (fc->num_background == fc->congestion_threshold && fc->sb) { if (fc->num_background == fc->congestion_threshold && fm->sb) {
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
} }
fc->num_background--; fc->num_background--;
fc->active_background--; fc->active_background--;
...@@ -323,14 +329,16 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) ...@@ -323,14 +329,16 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
} }
if (test_bit(FR_ASYNC, &req->flags)) if (test_bit(FR_ASYNC, &req->flags))
req->args->end(fc, req->args, req->out.h.error); req->args->end(fm, req->args, req->out.h.error);
put_request: put_request:
fuse_put_request(fc, req); fuse_put_request(req);
} }
EXPORT_SYMBOL_GPL(fuse_request_end); EXPORT_SYMBOL_GPL(fuse_request_end);
static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) static int queue_interrupt(struct fuse_req *req)
{ {
struct fuse_iqueue *fiq = &req->fm->fc->iq;
spin_lock(&fiq->lock); spin_lock(&fiq->lock);
/* Check for we've sent request to interrupt this req */ /* Check for we've sent request to interrupt this req */
if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) { if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
...@@ -357,8 +365,9 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) ...@@ -357,8 +365,9 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
return 0; return 0;
} }
static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) static void request_wait_answer(struct fuse_req *req)
{ {
struct fuse_conn *fc = req->fm->fc;
struct fuse_iqueue *fiq = &fc->iq; struct fuse_iqueue *fiq = &fc->iq;
int err; int err;
...@@ -373,7 +382,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) ...@@ -373,7 +382,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
/* matches barrier in fuse_dev_do_read() */ /* matches barrier in fuse_dev_do_read() */
smp_mb__after_atomic(); smp_mb__after_atomic();
if (test_bit(FR_SENT, &req->flags)) if (test_bit(FR_SENT, &req->flags))
queue_interrupt(fiq, req); queue_interrupt(req);
} }
if (!test_bit(FR_FORCE, &req->flags)) { if (!test_bit(FR_FORCE, &req->flags)) {
...@@ -402,9 +411,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) ...@@ -402,9 +411,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags)); wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
} }
static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) static void __fuse_request_send(struct fuse_req *req)
{ {
struct fuse_iqueue *fiq = &fc->iq; struct fuse_iqueue *fiq = &req->fm->fc->iq;
BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
spin_lock(&fiq->lock); spin_lock(&fiq->lock);
...@@ -418,7 +427,7 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) ...@@ -418,7 +427,7 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
__fuse_get_request(req); __fuse_get_request(req);
queue_request_and_unlock(fiq, req); queue_request_and_unlock(fiq, req);
request_wait_answer(fc, req); request_wait_answer(req);
/* Pairs with smp_wmb() in fuse_request_end() */ /* Pairs with smp_wmb() in fuse_request_end() */
smp_rmb(); smp_rmb();
} }
...@@ -457,8 +466,10 @@ static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) ...@@ -457,8 +466,10 @@ static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
} }
} }
static void fuse_force_creds(struct fuse_conn *fc, struct fuse_req *req) static void fuse_force_creds(struct fuse_req *req)
{ {
struct fuse_conn *fc = req->fm->fc;
req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
...@@ -473,23 +484,24 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) ...@@ -473,23 +484,24 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
__set_bit(FR_ASYNC, &req->flags); __set_bit(FR_ASYNC, &req->flags);
} }
ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
{ {
struct fuse_conn *fc = fm->fc;
struct fuse_req *req; struct fuse_req *req;
ssize_t ret; ssize_t ret;
if (args->force) { if (args->force) {
atomic_inc(&fc->num_waiting); atomic_inc(&fc->num_waiting);
req = fuse_request_alloc(GFP_KERNEL | __GFP_NOFAIL); req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL);
if (!args->nocreds) if (!args->nocreds)
fuse_force_creds(fc, req); fuse_force_creds(req);
__set_bit(FR_WAITING, &req->flags); __set_bit(FR_WAITING, &req->flags);
__set_bit(FR_FORCE, &req->flags); __set_bit(FR_FORCE, &req->flags);
} else { } else {
WARN_ON(args->nocreds); WARN_ON(args->nocreds);
req = fuse_get_req(fc, false); req = fuse_get_req(fm, false);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
} }
...@@ -500,20 +512,21 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) ...@@ -500,20 +512,21 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
if (!args->noreply) if (!args->noreply)
__set_bit(FR_ISREPLY, &req->flags); __set_bit(FR_ISREPLY, &req->flags);
__fuse_request_send(fc, req); __fuse_request_send(req);
ret = req->out.h.error; ret = req->out.h.error;
if (!ret && args->out_argvar) { if (!ret && args->out_argvar) {
BUG_ON(args->out_numargs == 0); BUG_ON(args->out_numargs == 0);
ret = args->out_args[args->out_numargs - 1].size; ret = args->out_args[args->out_numargs - 1].size;
} }
fuse_put_request(fc, req); fuse_put_request(req);
return ret; return ret;
} }
static bool fuse_request_queue_background(struct fuse_conn *fc, static bool fuse_request_queue_background(struct fuse_req *req)
struct fuse_req *req)
{ {
struct fuse_mount *fm = req->fm;
struct fuse_conn *fc = fm->fc;
bool queued = false; bool queued = false;
WARN_ON(!test_bit(FR_BACKGROUND, &req->flags)); WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
...@@ -527,9 +540,9 @@ static bool fuse_request_queue_background(struct fuse_conn *fc, ...@@ -527,9 +540,9 @@ static bool fuse_request_queue_background(struct fuse_conn *fc,
fc->num_background++; fc->num_background++;
if (fc->num_background == fc->max_background) if (fc->num_background == fc->max_background)
fc->blocked = 1; fc->blocked = 1;
if (fc->num_background == fc->congestion_threshold && fc->sb) { if (fc->num_background == fc->congestion_threshold && fm->sb) {
set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
} }
list_add_tail(&req->list, &fc->bg_queue); list_add_tail(&req->list, &fc->bg_queue);
flush_bg_queue(fc); flush_bg_queue(fc);
...@@ -540,28 +553,28 @@ static bool fuse_request_queue_background(struct fuse_conn *fc, ...@@ -540,28 +553,28 @@ static bool fuse_request_queue_background(struct fuse_conn *fc,
return queued; return queued;
} }
int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
gfp_t gfp_flags) gfp_t gfp_flags)
{ {
struct fuse_req *req; struct fuse_req *req;
if (args->force) { if (args->force) {
WARN_ON(!args->nocreds); WARN_ON(!args->nocreds);
req = fuse_request_alloc(gfp_flags); req = fuse_request_alloc(fm, gfp_flags);
if (!req) if (!req)
return -ENOMEM; return -ENOMEM;
__set_bit(FR_BACKGROUND, &req->flags); __set_bit(FR_BACKGROUND, &req->flags);
} else { } else {
WARN_ON(args->nocreds); WARN_ON(args->nocreds);
req = fuse_get_req(fc, true); req = fuse_get_req(fm, true);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
} }
fuse_args_to_req(req, args); fuse_args_to_req(req, args);
if (!fuse_request_queue_background(fc, req)) { if (!fuse_request_queue_background(req)) {
fuse_put_request(fc, req); fuse_put_request(req);
return -ENOTCONN; return -ENOTCONN;
} }
...@@ -569,14 +582,14 @@ int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, ...@@ -569,14 +582,14 @@ int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args,
} }
EXPORT_SYMBOL_GPL(fuse_simple_background); EXPORT_SYMBOL_GPL(fuse_simple_background);
static int fuse_simple_notify_reply(struct fuse_conn *fc, static int fuse_simple_notify_reply(struct fuse_mount *fm,
struct fuse_args *args, u64 unique) struct fuse_args *args, u64 unique)
{ {
struct fuse_req *req; struct fuse_req *req;
struct fuse_iqueue *fiq = &fc->iq; struct fuse_iqueue *fiq = &fm->fc->iq;
int err = 0; int err = 0;
req = fuse_get_req(fc, false); req = fuse_get_req(fm, false);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
...@@ -591,7 +604,7 @@ static int fuse_simple_notify_reply(struct fuse_conn *fc, ...@@ -591,7 +604,7 @@ static int fuse_simple_notify_reply(struct fuse_conn *fc,
} else { } else {
err = -ENODEV; err = -ENODEV;
spin_unlock(&fiq->lock); spin_unlock(&fiq->lock);
fuse_put_request(fc, req); fuse_put_request(req);
} }
return err; return err;
...@@ -785,15 +798,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) ...@@ -785,15 +798,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
struct page *newpage; struct page *newpage;
struct pipe_buffer *buf = cs->pipebufs; struct pipe_buffer *buf = cs->pipebufs;
get_page(oldpage);
err = unlock_request(cs->req); err = unlock_request(cs->req);
if (err) if (err)
return err; goto out_put_old;
fuse_copy_finish(cs); fuse_copy_finish(cs);
err = pipe_buf_confirm(cs->pipe, buf); err = pipe_buf_confirm(cs->pipe, buf);
if (err) if (err)
return err; goto out_put_old;
BUG_ON(!cs->nr_segs); BUG_ON(!cs->nr_segs);
cs->currbuf = buf; cs->currbuf = buf;
...@@ -833,7 +847,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) ...@@ -833,7 +847,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
if (err) { if (err) {
unlock_page(newpage); unlock_page(newpage);
return err; goto out_put_old;
} }
get_page(newpage); get_page(newpage);
...@@ -852,14 +866,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) ...@@ -852,14 +866,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (err) { if (err) {
unlock_page(newpage); unlock_page(newpage);
put_page(newpage); put_page(newpage);
return err; goto out_put_old;
} }
unlock_page(oldpage); unlock_page(oldpage);
/* Drop ref for ap->pages[] array */
put_page(oldpage); put_page(oldpage);
cs->len = 0; cs->len = 0;
return 0; err = 0;
out_put_old:
/* Drop ref obtained in this function */
put_page(oldpage);
return err;
out_fallback_unlock: out_fallback_unlock:
unlock_page(newpage); unlock_page(newpage);
...@@ -868,10 +887,10 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) ...@@ -868,10 +887,10 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
cs->offset = buf->offset; cs->offset = buf->offset;
err = lock_request(cs->req); err = lock_request(cs->req);
if (err) if (!err)
return err; err = 1;
return 1; goto out_put_old;
} }
static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
...@@ -883,14 +902,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, ...@@ -883,14 +902,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
if (cs->nr_segs >= cs->pipe->max_usage) if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO; return -EIO;
get_page(page);
err = unlock_request(cs->req); err = unlock_request(cs->req);
if (err) if (err) {
put_page(page);
return err; return err;
}
fuse_copy_finish(cs); fuse_copy_finish(cs);
buf = cs->pipebufs; buf = cs->pipebufs;
get_page(page);
buf->page = page; buf->page = page;
buf->offset = offset; buf->offset = offset;
buf->len = count; buf->len = count;
...@@ -1250,7 +1271,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, ...@@ -1250,7 +1271,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
/* SETXATTR is special, since it may contain too large data */ /* SETXATTR is special, since it may contain too large data */
if (args->opcode == FUSE_SETXATTR) if (args->opcode == FUSE_SETXATTR)
req->out.h.error = -E2BIG; req->out.h.error = -E2BIG;
fuse_request_end(fc, req); fuse_request_end(req);
goto restart; goto restart;
} }
spin_lock(&fpq->lock); spin_lock(&fpq->lock);
...@@ -1284,8 +1305,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, ...@@ -1284,8 +1305,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
/* matches barrier in request_wait_answer() */ /* matches barrier in request_wait_answer() */
smp_mb__after_atomic(); smp_mb__after_atomic();
if (test_bit(FR_INTERRUPTED, &req->flags)) if (test_bit(FR_INTERRUPTED, &req->flags))
queue_interrupt(fiq, req); queue_interrupt(req);
fuse_put_request(fc, req); fuse_put_request(req);
return reqsize; return reqsize;
...@@ -1293,7 +1314,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, ...@@ -1293,7 +1314,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
if (!test_bit(FR_PRIVATE, &req->flags)) if (!test_bit(FR_PRIVATE, &req->flags))
list_del_init(&req->list); list_del_init(&req->list);
spin_unlock(&fpq->lock); spin_unlock(&fpq->lock);
fuse_request_end(fc, req); fuse_request_end(req);
return err; return err;
err_unlock: err_unlock:
...@@ -1416,11 +1437,8 @@ static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, ...@@ -1416,11 +1437,8 @@ static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
fuse_copy_finish(cs); fuse_copy_finish(cs);
down_read(&fc->killsb); down_read(&fc->killsb);
err = -ENOENT; err = fuse_reverse_inval_inode(fc, outarg.ino,
if (fc->sb) {
err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
outarg.off, outarg.len); outarg.off, outarg.len);
}
up_read(&fc->killsb); up_read(&fc->killsb);
return err; return err;
...@@ -1466,9 +1484,7 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, ...@@ -1466,9 +1484,7 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
buf[outarg.namelen] = 0; buf[outarg.namelen] = 0;
down_read(&fc->killsb); down_read(&fc->killsb);
err = -ENOENT; err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name);
if (fc->sb)
err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
up_read(&fc->killsb); up_read(&fc->killsb);
kfree(buf); kfree(buf);
return err; return err;
...@@ -1516,10 +1532,7 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, ...@@ -1516,10 +1532,7 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
buf[outarg.namelen] = 0; buf[outarg.namelen] = 0;
down_read(&fc->killsb); down_read(&fc->killsb);
err = -ENOENT; err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name);
if (fc->sb)
err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
outarg.child, &name);
up_read(&fc->killsb); up_read(&fc->killsb);
kfree(buf); kfree(buf);
return err; return err;
...@@ -1561,10 +1574,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, ...@@ -1561,10 +1574,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
down_read(&fc->killsb); down_read(&fc->killsb);
err = -ENOENT; err = -ENOENT;
if (!fc->sb) inode = fuse_ilookup(fc, nodeid, NULL);
goto out_up_killsb;
inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
if (!inode) if (!inode)
goto out_up_killsb; goto out_up_killsb;
...@@ -1621,7 +1631,7 @@ struct fuse_retrieve_args { ...@@ -1621,7 +1631,7 @@ struct fuse_retrieve_args {
struct fuse_notify_retrieve_in inarg; struct fuse_notify_retrieve_in inarg;
}; };
static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args, static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
int error) int error)
{ {
struct fuse_retrieve_args *ra = struct fuse_retrieve_args *ra =
...@@ -1631,7 +1641,7 @@ static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args, ...@@ -1631,7 +1641,7 @@ static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_args *args,
kfree(ra); kfree(ra);
} }
static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
struct fuse_notify_retrieve_out *outarg) struct fuse_notify_retrieve_out *outarg)
{ {
int err; int err;
...@@ -1642,6 +1652,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, ...@@ -1642,6 +1652,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
unsigned int offset; unsigned int offset;
size_t total_len = 0; size_t total_len = 0;
unsigned int num_pages; unsigned int num_pages;
struct fuse_conn *fc = fm->fc;
struct fuse_retrieve_args *ra; struct fuse_retrieve_args *ra;
size_t args_size = sizeof(*ra); size_t args_size = sizeof(*ra);
struct fuse_args_pages *ap; struct fuse_args_pages *ap;
...@@ -1703,9 +1714,9 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, ...@@ -1703,9 +1714,9 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
args->in_args[0].value = &ra->inarg; args->in_args[0].value = &ra->inarg;
args->in_args[1].size = total_len; args->in_args[1].size = total_len;
err = fuse_simple_notify_reply(fc, args, outarg->notify_unique); err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
if (err) if (err)
fuse_retrieve_end(fc, args, err); fuse_retrieve_end(fm, args, err);
return err; return err;
} }
...@@ -1714,7 +1725,9 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, ...@@ -1714,7 +1725,9 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs) struct fuse_copy_state *cs)
{ {
struct fuse_notify_retrieve_out outarg; struct fuse_notify_retrieve_out outarg;
struct fuse_mount *fm;
struct inode *inode; struct inode *inode;
u64 nodeid;
int err; int err;
err = -EINVAL; err = -EINVAL;
...@@ -1729,15 +1742,13 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, ...@@ -1729,15 +1742,13 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
down_read(&fc->killsb); down_read(&fc->killsb);
err = -ENOENT; err = -ENOENT;
if (fc->sb) { nodeid = outarg.nodeid;
u64 nodeid = outarg.nodeid;
inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); inode = fuse_ilookup(fc, nodeid, &fm);
if (inode) { if (inode) {
err = fuse_retrieve(fc, inode, &outarg); err = fuse_retrieve(fm, inode, &outarg);
iput(inode); iput(inode);
} }
}
up_read(&fc->killsb); up_read(&fc->killsb);
return err; return err;
...@@ -1875,9 +1886,9 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, ...@@ -1875,9 +1886,9 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
else if (oh.error == -ENOSYS) else if (oh.error == -ENOSYS)
fc->no_interrupt = 1; fc->no_interrupt = 1;
else if (oh.error == -EAGAIN) else if (oh.error == -EAGAIN)
err = queue_interrupt(&fc->iq, req); err = queue_interrupt(req);
fuse_put_request(fc, req); fuse_put_request(req);
goto copy_finish; goto copy_finish;
} }
...@@ -1907,7 +1918,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, ...@@ -1907,7 +1918,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
list_del_init(&req->list); list_del_init(&req->list);
spin_unlock(&fpq->lock); spin_unlock(&fpq->lock);
fuse_request_end(fc, req); fuse_request_end(req);
out: out:
return err ? err : nbytes; return err ? err : nbytes;
...@@ -2045,7 +2056,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait) ...@@ -2045,7 +2056,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
} }
/* Abort all requests on the given list (pending or processing) */ /* Abort all requests on the given list (pending or processing) */
static void end_requests(struct fuse_conn *fc, struct list_head *head) static void end_requests(struct list_head *head)
{ {
while (!list_empty(head)) { while (!list_empty(head)) {
struct fuse_req *req; struct fuse_req *req;
...@@ -2053,7 +2064,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) ...@@ -2053,7 +2064,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
req->out.h.error = -ECONNABORTED; req->out.h.error = -ECONNABORTED;
clear_bit(FR_SENT, &req->flags); clear_bit(FR_SENT, &req->flags);
list_del_init(&req->list); list_del_init(&req->list);
fuse_request_end(fc, req); fuse_request_end(req);
} }
} }
...@@ -2148,7 +2159,7 @@ void fuse_abort_conn(struct fuse_conn *fc) ...@@ -2148,7 +2159,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
wake_up_all(&fc->blocked_waitq); wake_up_all(&fc->blocked_waitq);
spin_unlock(&fc->lock); spin_unlock(&fc->lock);
end_requests(fc, &to_end); end_requests(&to_end);
} else { } else {
spin_unlock(&fc->lock); spin_unlock(&fc->lock);
} }
...@@ -2178,7 +2189,7 @@ int fuse_dev_release(struct inode *inode, struct file *file) ...@@ -2178,7 +2189,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
list_splice_init(&fpq->processing[i], &to_end); list_splice_init(&fpq->processing[i], &to_end);
spin_unlock(&fpq->lock); spin_unlock(&fpq->lock);
end_requests(fc, &to_end); end_requests(&to_end);
/* Are we the last open device? */ /* Are we the last open device? */
if (atomic_dec_and_test(&fc->dev_count)) { if (atomic_dec_and_test(&fc->dev_count)) {
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/fs_context.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -196,7 +197,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) ...@@ -196,7 +197,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
{ {
struct inode *inode; struct inode *inode;
struct dentry *parent; struct dentry *parent;
struct fuse_conn *fc; struct fuse_mount *fm;
struct fuse_inode *fi; struct fuse_inode *fi;
int ret; int ret;
...@@ -218,27 +219,29 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) ...@@ -218,27 +219,29 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
if (flags & LOOKUP_RCU) if (flags & LOOKUP_RCU)
goto out; goto out;
fc = get_fuse_conn(inode); fm = get_fuse_mount(inode);
forget = fuse_alloc_forget(); forget = fuse_alloc_forget();
ret = -ENOMEM; ret = -ENOMEM;
if (!forget) if (!forget)
goto out; goto out;
attr_version = fuse_get_attr_version(fc); attr_version = fuse_get_attr_version(fm->fc);
parent = dget_parent(entry); parent = dget_parent(entry);
fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)), fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
&entry->d_name, &outarg); &entry->d_name, &outarg);
ret = fuse_simple_request(fc, &args); ret = fuse_simple_request(fm, &args);
dput(parent); dput(parent);
/* Zero nodeid is same as -ENOENT */ /* Zero nodeid is same as -ENOENT */
if (!ret && !outarg.nodeid) if (!ret && !outarg.nodeid)
ret = -ENOENT; ret = -ENOENT;
if (!ret) { if (!ret) {
fi = get_fuse_inode(inode); fi = get_fuse_inode(inode);
if (outarg.nodeid != get_node_id(inode)) { if (outarg.nodeid != get_node_id(inode) ||
fuse_queue_forget(fc, forget, outarg.nodeid, 1); (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
fuse_queue_forget(fm->fc, forget,
outarg.nodeid, 1);
goto invalid; goto invalid;
} }
spin_lock(&fi->lock); spin_lock(&fi->lock);
...@@ -298,6 +301,79 @@ static int fuse_dentry_delete(const struct dentry *dentry) ...@@ -298,6 +301,79 @@ static int fuse_dentry_delete(const struct dentry *dentry)
return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
} }
/*
* Create a fuse_mount object with a new superblock (with path->dentry
* as the root), and return that mount so it can be auto-mounted on
* @path.
*/
static struct vfsmount *fuse_dentry_automount(struct path *path)
{
struct fs_context *fsc;
struct fuse_mount *parent_fm = get_fuse_mount_super(path->mnt->mnt_sb);
struct fuse_conn *fc = parent_fm->fc;
struct fuse_mount *fm;
struct vfsmount *mnt;
struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
struct super_block *sb;
int err;
fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
if (IS_ERR(fsc)) {
err = PTR_ERR(fsc);
goto out;
}
err = -ENOMEM;
fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
if (!fm)
goto out_put_fsc;
refcount_set(&fm->count, 1);
fsc->s_fs_info = fm;
sb = sget_fc(fsc, NULL, set_anon_super_fc);
if (IS_ERR(sb)) {
err = PTR_ERR(sb);
fuse_mount_put(fm);
goto out_put_fsc;
}
fm->fc = fuse_conn_get(fc);
/* Initialize superblock, making @mp_fi its root */
err = fuse_fill_super_submount(sb, mp_fi);
if (err)
goto out_put_sb;
sb->s_flags |= SB_ACTIVE;
fsc->root = dget(sb->s_root);
/* We are done configuring the superblock, so unlock it */
up_write(&sb->s_umount);
down_write(&fc->killsb);
list_add_tail(&fm->fc_entry, &fc->mounts);
up_write(&fc->killsb);
/* Create the submount */
mnt = vfs_create_mount(fsc);
if (IS_ERR(mnt)) {
err = PTR_ERR(mnt);
goto out_put_fsc;
}
mntget(mnt);
put_fs_context(fsc);
return mnt;
out_put_sb:
/*
* Only jump here when fsc->root is NULL and sb is still locked
* (otherwise put_fs_context() will put the superblock)
*/
deactivate_locked_super(sb);
out_put_fsc:
put_fs_context(fsc);
out:
return ERR_PTR(err);
}
const struct dentry_operations fuse_dentry_operations = { const struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate, .d_revalidate = fuse_dentry_revalidate,
.d_delete = fuse_dentry_delete, .d_delete = fuse_dentry_delete,
...@@ -305,6 +381,7 @@ const struct dentry_operations fuse_dentry_operations = { ...@@ -305,6 +381,7 @@ const struct dentry_operations fuse_dentry_operations = {
.d_init = fuse_dentry_init, .d_init = fuse_dentry_init,
.d_release = fuse_dentry_release, .d_release = fuse_dentry_release,
#endif #endif
.d_automount = fuse_dentry_automount,
}; };
const struct dentry_operations fuse_root_dentry_operations = { const struct dentry_operations fuse_root_dentry_operations = {
...@@ -329,7 +406,7 @@ bool fuse_invalid_attr(struct fuse_attr *attr) ...@@ -329,7 +406,7 @@ bool fuse_invalid_attr(struct fuse_attr *attr)
int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode) struct fuse_entry_out *outarg, struct inode **inode)
{ {
struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_mount *fm = get_fuse_mount_super(sb);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_forget_link *forget; struct fuse_forget_link *forget;
u64 attr_version; u64 attr_version;
...@@ -346,10 +423,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name ...@@ -346,10 +423,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
if (!forget) if (!forget)
goto out; goto out;
attr_version = fuse_get_attr_version(fc); attr_version = fuse_get_attr_version(fm->fc);
fuse_lookup_init(fc, &args, nodeid, name, outarg); fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
/* Zero nodeid is same as -ENOENT, but with valid timeout */ /* Zero nodeid is same as -ENOENT, but with valid timeout */
if (err || !outarg->nodeid) if (err || !outarg->nodeid)
goto out_put_forget; goto out_put_forget;
...@@ -365,7 +442,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name ...@@ -365,7 +442,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
attr_version); attr_version);
err = -ENOMEM; err = -ENOMEM;
if (!*inode) { if (!*inode) {
fuse_queue_forget(fc, forget, outarg->nodeid, 1); fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
goto out; goto out;
} }
err = 0; err = 0;
...@@ -434,7 +511,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ...@@ -434,7 +511,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
{ {
int err; int err;
struct inode *inode; struct inode *inode;
struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_forget_link *forget; struct fuse_forget_link *forget;
struct fuse_create_in inarg; struct fuse_create_in inarg;
...@@ -452,11 +529,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ...@@ -452,11 +529,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
goto out_err; goto out_err;
err = -ENOMEM; err = -ENOMEM;
ff = fuse_file_alloc(fc); ff = fuse_file_alloc(fm);
if (!ff) if (!ff)
goto out_put_forget_req; goto out_put_forget_req;
if (!fc->dont_mask) if (!fm->fc->dont_mask)
mode &= ~current_umask(); mode &= ~current_umask();
flags &= ~O_NOCTTY; flags &= ~O_NOCTTY;
...@@ -477,7 +554,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ...@@ -477,7 +554,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
args.out_args[0].value = &outentry; args.out_args[0].value = &outentry;
args.out_args[1].size = sizeof(outopen); args.out_args[1].size = sizeof(outopen);
args.out_args[1].value = &outopen; args.out_args[1].value = &outopen;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (err) if (err)
goto out_free_ff; goto out_free_ff;
...@@ -494,7 +571,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ...@@ -494,7 +571,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (!inode) { if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC); flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(NULL, ff, flags); fuse_sync_release(NULL, ff, flags);
fuse_queue_forget(fc, forget, outentry.nodeid, 1); fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
err = -ENOMEM; err = -ENOMEM;
goto out_err; goto out_err;
} }
...@@ -567,7 +644,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, ...@@ -567,7 +644,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
/* /*
* Code shared between mknod, mkdir, symlink and link * Code shared between mknod, mkdir, symlink and link
*/ */
static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
struct inode *dir, struct dentry *entry, struct inode *dir, struct dentry *entry,
umode_t mode) umode_t mode)
{ {
...@@ -586,7 +663,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, ...@@ -586,7 +663,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
args->out_numargs = 1; args->out_numargs = 1;
args->out_args[0].size = sizeof(outarg); args->out_args[0].size = sizeof(outarg);
args->out_args[0].value = &outarg; args->out_args[0].value = &outarg;
err = fuse_simple_request(fc, args); err = fuse_simple_request(fm, args);
if (err) if (err)
goto out_put_forget_req; goto out_put_forget_req;
...@@ -600,7 +677,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, ...@@ -600,7 +677,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
&outarg.attr, entry_attr_timeout(&outarg), 0); &outarg.attr, entry_attr_timeout(&outarg), 0);
if (!inode) { if (!inode) {
fuse_queue_forget(fc, forget, outarg.nodeid, 1); fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
return -ENOMEM; return -ENOMEM;
} }
kfree(forget); kfree(forget);
...@@ -628,10 +705,10 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, ...@@ -628,10 +705,10 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
dev_t rdev) dev_t rdev)
{ {
struct fuse_mknod_in inarg; struct fuse_mknod_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args); FUSE_ARGS(args);
if (!fc->dont_mask) if (!fm->fc->dont_mask)
mode &= ~current_umask(); mode &= ~current_umask();
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
...@@ -644,7 +721,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, ...@@ -644,7 +721,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
args.in_args[0].value = &inarg; args.in_args[0].value = &inarg;
args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].size = entry->d_name.len + 1;
args.in_args[1].value = entry->d_name.name; args.in_args[1].value = entry->d_name.name;
return create_new_entry(fc, &args, dir, entry, mode); return create_new_entry(fm, &args, dir, entry, mode);
} }
static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
...@@ -656,10 +733,10 @@ static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, ...@@ -656,10 +733,10 @@ static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
{ {
struct fuse_mkdir_in inarg; struct fuse_mkdir_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args); FUSE_ARGS(args);
if (!fc->dont_mask) if (!fm->fc->dont_mask)
mode &= ~current_umask(); mode &= ~current_umask();
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
...@@ -671,13 +748,13 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) ...@@ -671,13 +748,13 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
args.in_args[0].value = &inarg; args.in_args[0].value = &inarg;
args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].size = entry->d_name.len + 1;
args.in_args[1].value = entry->d_name.name; args.in_args[1].value = entry->d_name.name;
return create_new_entry(fc, &args, dir, entry, S_IFDIR); return create_new_entry(fm, &args, dir, entry, S_IFDIR);
} }
static int fuse_symlink(struct inode *dir, struct dentry *entry, static int fuse_symlink(struct inode *dir, struct dentry *entry,
const char *link) const char *link)
{ {
struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_mount *fm = get_fuse_mount(dir);
unsigned len = strlen(link) + 1; unsigned len = strlen(link) + 1;
FUSE_ARGS(args); FUSE_ARGS(args);
...@@ -687,7 +764,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, ...@@ -687,7 +764,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
args.in_args[0].value = entry->d_name.name; args.in_args[0].value = entry->d_name.name;
args.in_args[1].size = len; args.in_args[1].size = len;
args.in_args[1].value = link; args.in_args[1].value = link;
return create_new_entry(fc, &args, dir, entry, S_IFLNK); return create_new_entry(fm, &args, dir, entry, S_IFLNK);
} }
void fuse_update_ctime(struct inode *inode) void fuse_update_ctime(struct inode *inode)
...@@ -701,7 +778,7 @@ void fuse_update_ctime(struct inode *inode) ...@@ -701,7 +778,7 @@ void fuse_update_ctime(struct inode *inode)
static int fuse_unlink(struct inode *dir, struct dentry *entry) static int fuse_unlink(struct inode *dir, struct dentry *entry)
{ {
int err; int err;
struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args); FUSE_ARGS(args);
args.opcode = FUSE_UNLINK; args.opcode = FUSE_UNLINK;
...@@ -709,13 +786,13 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) ...@@ -709,13 +786,13 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
args.in_numargs = 1; args.in_numargs = 1;
args.in_args[0].size = entry->d_name.len + 1; args.in_args[0].size = entry->d_name.len + 1;
args.in_args[0].value = entry->d_name.name; args.in_args[0].value = entry->d_name.name;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (!err) { if (!err) {
struct inode *inode = d_inode(entry); struct inode *inode = d_inode(entry);
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
spin_lock(&fi->lock); spin_lock(&fi->lock);
fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
/* /*
* If i_nlink == 0 then unlink doesn't make sense, yet this can * If i_nlink == 0 then unlink doesn't make sense, yet this can
* happen if userspace filesystem is careless. It would be * happen if userspace filesystem is careless. It would be
...@@ -737,7 +814,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) ...@@ -737,7 +814,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
static int fuse_rmdir(struct inode *dir, struct dentry *entry) static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{ {
int err; int err;
struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args); FUSE_ARGS(args);
args.opcode = FUSE_RMDIR; args.opcode = FUSE_RMDIR;
...@@ -745,7 +822,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) ...@@ -745,7 +822,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
args.in_numargs = 1; args.in_numargs = 1;
args.in_args[0].size = entry->d_name.len + 1; args.in_args[0].size = entry->d_name.len + 1;
args.in_args[0].value = entry->d_name.name; args.in_args[0].value = entry->d_name.name;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (!err) { if (!err) {
clear_nlink(d_inode(entry)); clear_nlink(d_inode(entry));
fuse_dir_changed(dir); fuse_dir_changed(dir);
...@@ -761,7 +838,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, ...@@ -761,7 +838,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
{ {
int err; int err;
struct fuse_rename2_in inarg; struct fuse_rename2_in inarg;
struct fuse_conn *fc = get_fuse_conn(olddir); struct fuse_mount *fm = get_fuse_mount(olddir);
FUSE_ARGS(args); FUSE_ARGS(args);
memset(&inarg, 0, argsize); memset(&inarg, 0, argsize);
...@@ -776,7 +853,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, ...@@ -776,7 +853,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
args.in_args[1].value = oldent->d_name.name; args.in_args[1].value = oldent->d_name.name;
args.in_args[2].size = newent->d_name.len + 1; args.in_args[2].size = newent->d_name.len + 1;
args.in_args[2].value = newent->d_name.name; args.in_args[2].value = newent->d_name.name;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (!err) { if (!err) {
/* ctime changes */ /* ctime changes */
fuse_invalidate_attr(d_inode(oldent)); fuse_invalidate_attr(d_inode(oldent));
...@@ -847,7 +924,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, ...@@ -847,7 +924,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
int err; int err;
struct fuse_link_in inarg; struct fuse_link_in inarg;
struct inode *inode = d_inode(entry); struct inode *inode = d_inode(entry);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
...@@ -858,7 +935,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, ...@@ -858,7 +935,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
args.in_args[0].value = &inarg; args.in_args[0].value = &inarg;
args.in_args[1].size = newent->d_name.len + 1; args.in_args[1].size = newent->d_name.len + 1;
args.in_args[1].value = newent->d_name.name; args.in_args[1].value = newent->d_name.name;
err = create_new_entry(fc, &args, newdir, newent, inode->i_mode); err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
/* Contrary to "normal" filesystems it can happen that link /* Contrary to "normal" filesystems it can happen that link
makes two "logical" inodes point to the same "physical" makes two "logical" inodes point to the same "physical"
inode. We invalidate the attributes of the old one, so it inode. We invalidate the attributes of the old one, so it
...@@ -869,7 +946,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, ...@@ -869,7 +946,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
spin_lock(&fi->lock); spin_lock(&fi->lock);
fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
if (likely(inode->i_nlink < UINT_MAX)) if (likely(inode->i_nlink < UINT_MAX))
inc_nlink(inode); inc_nlink(inode);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
...@@ -926,11 +1003,11 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, ...@@ -926,11 +1003,11 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
int err; int err;
struct fuse_getattr_in inarg; struct fuse_getattr_in inarg;
struct fuse_attr_out outarg; struct fuse_attr_out outarg;
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
u64 attr_version; u64 attr_version;
attr_version = fuse_get_attr_version(fc); attr_version = fuse_get_attr_version(fm->fc);
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg)); memset(&outarg, 0, sizeof(outarg));
...@@ -949,7 +1026,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, ...@@ -949,7 +1026,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
args.out_numargs = 1; args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg); args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg; args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (!err) { if (!err) {
if (fuse_invalid_attr(&outarg.attr) || if (fuse_invalid_attr(&outarg.attr) ||
(inode->i_mode ^ outarg.attr.mode) & S_IFMT) { (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
...@@ -1002,7 +1079,7 @@ int fuse_update_attributes(struct inode *inode, struct file *file) ...@@ -1002,7 +1079,7 @@ int fuse_update_attributes(struct inode *inode, struct file *file)
STATX_BASIC_STATS & ~STATX_ATIME, 0); STATX_BASIC_STATS & ~STATX_ATIME, 0);
} }
int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
u64 child_nodeid, struct qstr *name) u64 child_nodeid, struct qstr *name)
{ {
int err = -ENOTDIR; int err = -ENOTDIR;
...@@ -1010,7 +1087,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, ...@@ -1010,7 +1087,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
struct dentry *dir; struct dentry *dir;
struct dentry *entry; struct dentry *entry;
parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid); parent = fuse_ilookup(fc, parent_nodeid, NULL);
if (!parent) if (!parent)
return -ENOENT; return -ENOENT;
...@@ -1102,14 +1179,14 @@ int fuse_allow_current_process(struct fuse_conn *fc) ...@@ -1102,14 +1179,14 @@ int fuse_allow_current_process(struct fuse_conn *fc)
static int fuse_access(struct inode *inode, int mask) static int fuse_access(struct inode *inode, int mask)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_access_in inarg; struct fuse_access_in inarg;
int err; int err;
BUG_ON(mask & MAY_NOT_BLOCK); BUG_ON(mask & MAY_NOT_BLOCK);
if (fc->no_access) if (fm->fc->no_access)
return 0; return 0;
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
...@@ -1119,9 +1196,9 @@ static int fuse_access(struct inode *inode, int mask) ...@@ -1119,9 +1196,9 @@ static int fuse_access(struct inode *inode, int mask)
args.in_numargs = 1; args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg); args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg; args.in_args[0].value = &inarg;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_access = 1; fm->fc->no_access = 1;
err = 0; err = 0;
} }
return err; return err;
...@@ -1209,7 +1286,7 @@ static int fuse_permission(struct inode *inode, int mask) ...@@ -1209,7 +1286,7 @@ static int fuse_permission(struct inode *inode, int mask)
static int fuse_readlink_page(struct inode *inode, struct page *page) static int fuse_readlink_page(struct inode *inode, struct page *page)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 }; struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
struct fuse_args_pages ap = { struct fuse_args_pages ap = {
.num_pages = 1, .num_pages = 1,
...@@ -1226,7 +1303,7 @@ static int fuse_readlink_page(struct inode *inode, struct page *page) ...@@ -1226,7 +1303,7 @@ static int fuse_readlink_page(struct inode *inode, struct page *page)
ap.args.page_zeroing = true; ap.args.page_zeroing = true;
ap.args.out_numargs = 1; ap.args.out_numargs = 1;
ap.args.out_args[0].size = desc.length; ap.args.out_args[0].size = desc.length;
res = fuse_simple_request(fc, &ap.args); res = fuse_simple_request(fm, &ap.args);
fuse_invalidate_atime(inode); fuse_invalidate_atime(inode);
...@@ -1454,7 +1531,7 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, ...@@ -1454,7 +1531,7 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
*/ */
int fuse_flush_times(struct inode *inode, struct fuse_file *ff) int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_setattr_in inarg; struct fuse_setattr_in inarg;
struct fuse_attr_out outarg; struct fuse_attr_out outarg;
...@@ -1465,7 +1542,7 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff) ...@@ -1465,7 +1542,7 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
inarg.valid = FATTR_MTIME; inarg.valid = FATTR_MTIME;
inarg.mtime = inode->i_mtime.tv_sec; inarg.mtime = inode->i_mtime.tv_sec;
inarg.mtimensec = inode->i_mtime.tv_nsec; inarg.mtimensec = inode->i_mtime.tv_nsec;
if (fc->minor >= 23) { if (fm->fc->minor >= 23) {
inarg.valid |= FATTR_CTIME; inarg.valid |= FATTR_CTIME;
inarg.ctime = inode->i_ctime.tv_sec; inarg.ctime = inode->i_ctime.tv_sec;
inarg.ctimensec = inode->i_ctime.tv_nsec; inarg.ctimensec = inode->i_ctime.tv_nsec;
...@@ -1474,9 +1551,9 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff) ...@@ -1474,9 +1551,9 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
inarg.valid |= FATTR_FH; inarg.valid |= FATTR_FH;
inarg.fh = ff->fh; inarg.fh = ff->fh;
} }
fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
return fuse_simple_request(fc, &args); return fuse_simple_request(fm, &args);
} }
/* /*
...@@ -1491,7 +1568,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, ...@@ -1491,7 +1568,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
struct file *file) struct file *file)
{ {
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_conn *fc = fm->fc;
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_setattr_in inarg; struct fuse_setattr_in inarg;
...@@ -1501,6 +1579,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, ...@@ -1501,6 +1579,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
loff_t oldsize; loff_t oldsize;
int err; int err;
bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
bool fault_blocked = false;
if (!fc->default_permissions) if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE; attr->ia_valid |= ATTR_FORCE;
...@@ -1509,6 +1588,22 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, ...@@ -1509,6 +1588,22 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
if (err) if (err)
return err; return err;
if (attr->ia_valid & ATTR_SIZE) {
if (WARN_ON(!S_ISREG(inode->i_mode)))
return -EIO;
is_truncate = true;
}
if (FUSE_IS_DAX(inode) && is_truncate) {
down_write(&fi->i_mmap_sem);
fault_blocked = true;
err = fuse_dax_break_layouts(inode, 0, 0);
if (err) {
up_write(&fi->i_mmap_sem);
return err;
}
}
if (attr->ia_valid & ATTR_OPEN) { if (attr->ia_valid & ATTR_OPEN) {
/* This is coming from open(..., ... | O_TRUNC); */ /* This is coming from open(..., ... | O_TRUNC); */
WARN_ON(!(attr->ia_valid & ATTR_SIZE)); WARN_ON(!(attr->ia_valid & ATTR_SIZE));
...@@ -1521,17 +1616,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, ...@@ -1521,17 +1616,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
*/ */
i_size_write(inode, 0); i_size_write(inode, 0);
truncate_pagecache(inode, 0); truncate_pagecache(inode, 0);
return 0; goto out;
} }
file = NULL; file = NULL;
} }
if (attr->ia_valid & ATTR_SIZE) {
if (WARN_ON(!S_ISREG(inode->i_mode)))
return -EIO;
is_truncate = true;
}
/* Flush dirty data/metadata before non-truncate SETATTR */ /* Flush dirty data/metadata before non-truncate SETATTR */
if (is_wb && S_ISREG(inode->i_mode) && if (is_wb && S_ISREG(inode->i_mode) &&
attr->ia_valid & attr->ia_valid &
...@@ -1566,7 +1655,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, ...@@ -1566,7 +1655,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
inarg.lock_owner = fuse_lock_owner_id(fc, current->files); inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
} }
fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (err) { if (err) {
if (err == -EINTR) if (err == -EINTR)
fuse_invalidate_attr(inode); fuse_invalidate_attr(inode);
...@@ -1614,6 +1703,10 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, ...@@ -1614,6 +1703,10 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
} }
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
out:
if (fault_blocked)
up_write(&fi->i_mmap_sem);
return 0; return 0;
error: error:
...@@ -1621,6 +1714,9 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, ...@@ -1621,6 +1714,9 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
fuse_release_nowrite(inode); fuse_release_nowrite(inode);
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
if (fault_blocked)
up_write(&fi->i_mmap_sem);
return err; return err;
} }
......
...@@ -32,7 +32,7 @@ static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, ...@@ -32,7 +32,7 @@ static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
return pages; return pages;
} }
static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
int opcode, struct fuse_open_out *outargp) int opcode, struct fuse_open_out *outargp)
{ {
struct fuse_open_in inarg; struct fuse_open_in inarg;
...@@ -40,7 +40,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, ...@@ -40,7 +40,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
if (!fc->atomic_o_trunc) if (!fm->fc->atomic_o_trunc)
inarg.flags &= ~O_TRUNC; inarg.flags &= ~O_TRUNC;
args.opcode = opcode; args.opcode = opcode;
args.nodeid = nodeid; args.nodeid = nodeid;
...@@ -51,7 +51,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, ...@@ -51,7 +51,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
args.out_args[0].size = sizeof(*outargp); args.out_args[0].size = sizeof(*outargp);
args.out_args[0].value = outargp; args.out_args[0].value = outargp;
return fuse_simple_request(fc, &args); return fuse_simple_request(fm, &args);
} }
struct fuse_release_args { struct fuse_release_args {
...@@ -60,7 +60,7 @@ struct fuse_release_args { ...@@ -60,7 +60,7 @@ struct fuse_release_args {
struct inode *inode; struct inode *inode;
}; };
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
{ {
struct fuse_file *ff; struct fuse_file *ff;
...@@ -68,7 +68,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) ...@@ -68,7 +68,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
if (unlikely(!ff)) if (unlikely(!ff))
return NULL; return NULL;
ff->fc = fc; ff->fm = fm;
ff->release_args = kzalloc(sizeof(*ff->release_args), ff->release_args = kzalloc(sizeof(*ff->release_args),
GFP_KERNEL_ACCOUNT); GFP_KERNEL_ACCOUNT);
if (!ff->release_args) { if (!ff->release_args) {
...@@ -82,7 +82,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) ...@@ -82,7 +82,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
RB_CLEAR_NODE(&ff->polled_node); RB_CLEAR_NODE(&ff->polled_node);
init_waitqueue_head(&ff->poll_wait); init_waitqueue_head(&ff->poll_wait);
ff->kh = atomic64_inc_return(&fc->khctr); ff->kh = atomic64_inc_return(&fm->fc->khctr);
return ff; return ff;
} }
...@@ -100,7 +100,7 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) ...@@ -100,7 +100,7 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff)
return ff; return ff;
} }
static void fuse_release_end(struct fuse_conn *fc, struct fuse_args *args, static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
int error) int error)
{ {
struct fuse_release_args *ra = container_of(args, typeof(*ra), args); struct fuse_release_args *ra = container_of(args, typeof(*ra), args);
...@@ -114,29 +114,30 @@ static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) ...@@ -114,29 +114,30 @@ static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
if (refcount_dec_and_test(&ff->count)) { if (refcount_dec_and_test(&ff->count)) {
struct fuse_args *args = &ff->release_args->args; struct fuse_args *args = &ff->release_args->args;
if (isdir ? ff->fc->no_opendir : ff->fc->no_open) { if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) {
/* Do nothing when client does not implement 'open' */ /* Do nothing when client does not implement 'open' */
fuse_release_end(ff->fc, args, 0); fuse_release_end(ff->fm, args, 0);
} else if (sync) { } else if (sync) {
fuse_simple_request(ff->fc, args); fuse_simple_request(ff->fm, args);
fuse_release_end(ff->fc, args, 0); fuse_release_end(ff->fm, args, 0);
} else { } else {
args->end = fuse_release_end; args->end = fuse_release_end;
if (fuse_simple_background(ff->fc, args, if (fuse_simple_background(ff->fm, args,
GFP_KERNEL | __GFP_NOFAIL)) GFP_KERNEL | __GFP_NOFAIL))
fuse_release_end(ff->fc, args, -ENOTCONN); fuse_release_end(ff->fm, args, -ENOTCONN);
} }
kfree(ff); kfree(ff);
} }
} }
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
bool isdir) bool isdir)
{ {
struct fuse_conn *fc = fm->fc;
struct fuse_file *ff; struct fuse_file *ff;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
ff = fuse_file_alloc(fc); ff = fuse_file_alloc(fm);
if (!ff) if (!ff)
return -ENOMEM; return -ENOMEM;
...@@ -147,7 +148,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, ...@@ -147,7 +148,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
struct fuse_open_out outarg; struct fuse_open_out outarg;
int err; int err;
err = fuse_send_open(fc, nodeid, file, opcode, &outarg); err = fuse_send_open(fm, nodeid, file, opcode, &outarg);
if (!err) { if (!err) {
ff->fh = outarg.fh; ff->fh = outarg.fh;
ff->open_flags = outarg.open_flags; ff->open_flags = outarg.open_flags;
...@@ -216,27 +217,40 @@ void fuse_finish_open(struct inode *inode, struct file *file) ...@@ -216,27 +217,40 @@ void fuse_finish_open(struct inode *inode, struct file *file)
int fuse_open_common(struct inode *inode, struct file *file, bool isdir) int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_conn *fc = fm->fc;
int err; int err;
bool is_wb_truncate = (file->f_flags & O_TRUNC) && bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
fc->atomic_o_trunc && fc->atomic_o_trunc &&
fc->writeback_cache; fc->writeback_cache;
bool dax_truncate = (file->f_flags & O_TRUNC) &&
fc->atomic_o_trunc && FUSE_IS_DAX(inode);
err = generic_file_open(inode, file); err = generic_file_open(inode, file);
if (err) if (err)
return err; return err;
if (is_wb_truncate) { if (is_wb_truncate || dax_truncate) {
inode_lock(inode); inode_lock(inode);
fuse_set_nowrite(inode); fuse_set_nowrite(inode);
} }
err = fuse_do_open(fc, get_node_id(inode), file, isdir); if (dax_truncate) {
down_write(&get_fuse_inode(inode)->i_mmap_sem);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
goto out;
}
err = fuse_do_open(fm, get_node_id(inode), file, isdir);
if (!err) if (!err)
fuse_finish_open(inode, file); fuse_finish_open(inode, file);
if (is_wb_truncate) { out:
if (dax_truncate)
up_write(&get_fuse_inode(inode)->i_mmap_sem);
if (is_wb_truncate | dax_truncate) {
fuse_release_nowrite(inode); fuse_release_nowrite(inode);
inode_unlock(inode); inode_unlock(inode);
} }
...@@ -247,7 +261,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) ...@@ -247,7 +261,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
int flags, int opcode) int flags, int opcode)
{ {
struct fuse_conn *fc = ff->fc; struct fuse_conn *fc = ff->fm->fc;
struct fuse_release_args *ra = ff->release_args; struct fuse_release_args *ra = ff->release_args;
/* Inode is NULL on error path of fuse_create_open() */ /* Inode is NULL on error path of fuse_create_open() */
...@@ -285,7 +299,7 @@ void fuse_release_common(struct file *file, bool isdir) ...@@ -285,7 +299,7 @@ void fuse_release_common(struct file *file, bool isdir)
if (ff->flock) { if (ff->flock) {
ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
ra->inarg.lock_owner = fuse_lock_owner_id(ff->fc, ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc,
(fl_owner_t) file); (fl_owner_t) file);
} }
/* Hold inode until release is finished */ /* Hold inode until release is finished */
...@@ -300,7 +314,7 @@ void fuse_release_common(struct file *file, bool isdir) ...@@ -300,7 +314,7 @@ void fuse_release_common(struct file *file, bool isdir)
* synchronous RELEASE is allowed (and desirable) in this case * synchronous RELEASE is allowed (and desirable) in this case
* because the server can be trusted not to screw up. * because the server can be trusted not to screw up.
*/ */
fuse_file_put(ff, ff->fc->destroy, isdir); fuse_file_put(ff, ff->fm->fc->destroy, isdir);
} }
static int fuse_open(struct inode *inode, struct file *file) static int fuse_open(struct inode *inode, struct file *file)
...@@ -443,7 +457,7 @@ static void fuse_sync_writes(struct inode *inode) ...@@ -443,7 +457,7 @@ static void fuse_sync_writes(struct inode *inode)
static int fuse_flush(struct file *file, fl_owner_t id) static int fuse_flush(struct file *file, fl_owner_t id)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct fuse_flush_in inarg; struct fuse_flush_in inarg;
FUSE_ARGS(args); FUSE_ARGS(args);
...@@ -465,12 +479,12 @@ static int fuse_flush(struct file *file, fl_owner_t id) ...@@ -465,12 +479,12 @@ static int fuse_flush(struct file *file, fl_owner_t id)
return err; return err;
err = 0; err = 0;
if (fc->no_flush) if (fm->fc->no_flush)
goto inval_attr_out; goto inval_attr_out;
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh; inarg.fh = ff->fh;
inarg.lock_owner = fuse_lock_owner_id(fc, id); inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
args.opcode = FUSE_FLUSH; args.opcode = FUSE_FLUSH;
args.nodeid = get_node_id(inode); args.nodeid = get_node_id(inode);
args.in_numargs = 1; args.in_numargs = 1;
...@@ -478,9 +492,9 @@ static int fuse_flush(struct file *file, fl_owner_t id) ...@@ -478,9 +492,9 @@ static int fuse_flush(struct file *file, fl_owner_t id)
args.in_args[0].value = &inarg; args.in_args[0].value = &inarg;
args.force = true; args.force = true;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_flush = 1; fm->fc->no_flush = 1;
err = 0; err = 0;
} }
...@@ -489,7 +503,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) ...@@ -489,7 +503,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
* In memory i_blocks is not maintained by fuse, if writeback cache is * In memory i_blocks is not maintained by fuse, if writeback cache is
* enabled, i_blocks from cached attr may not be accurate. * enabled, i_blocks from cached attr may not be accurate.
*/ */
if (!err && fc->writeback_cache) if (!err && fm->fc->writeback_cache)
fuse_invalidate_attr(inode); fuse_invalidate_attr(inode);
return err; return err;
} }
...@@ -498,7 +512,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, ...@@ -498,7 +512,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
int datasync, int opcode) int datasync, int opcode)
{ {
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_fsync_in inarg; struct fuse_fsync_in inarg;
...@@ -511,7 +525,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, ...@@ -511,7 +525,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
args.in_numargs = 1; args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg); args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg; args.in_args[0].value = &inarg;
return fuse_simple_request(fc, &args); return fuse_simple_request(fm, &args);
} }
static int fuse_fsync(struct file *file, loff_t start, loff_t end, static int fuse_fsync(struct file *file, loff_t start, loff_t end,
...@@ -686,7 +700,7 @@ static void fuse_io_free(struct fuse_io_args *ia) ...@@ -686,7 +700,7 @@ static void fuse_io_free(struct fuse_io_args *ia)
kfree(ia); kfree(ia);
} }
static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args, static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
int err) int err)
{ {
struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
...@@ -715,7 +729,7 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args, ...@@ -715,7 +729,7 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_args *args,
fuse_io_free(ia); fuse_io_free(ia);
} }
static ssize_t fuse_async_req_send(struct fuse_conn *fc, static ssize_t fuse_async_req_send(struct fuse_mount *fm,
struct fuse_io_args *ia, size_t num_bytes) struct fuse_io_args *ia, size_t num_bytes)
{ {
ssize_t err; ssize_t err;
...@@ -729,9 +743,9 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc, ...@@ -729,9 +743,9 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc,
ia->ap.args.end = fuse_aio_complete_req; ia->ap.args.end = fuse_aio_complete_req;
ia->ap.args.may_block = io->should_dirty; ia->ap.args.may_block = io->should_dirty;
err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL); err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL);
if (err) if (err)
fuse_aio_complete_req(fc, &ia->ap.args, err); fuse_aio_complete_req(fm, &ia->ap.args, err);
return num_bytes; return num_bytes;
} }
...@@ -741,18 +755,18 @@ static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count, ...@@ -741,18 +755,18 @@ static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count,
{ {
struct file *file = ia->io->iocb->ki_filp; struct file *file = ia->io->iocb->ki_filp;
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc; struct fuse_mount *fm = ff->fm;
fuse_read_args_fill(ia, file, pos, count, FUSE_READ); fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
if (owner != NULL) { if (owner != NULL) {
ia->read.in.read_flags |= FUSE_READ_LOCKOWNER; ia->read.in.read_flags |= FUSE_READ_LOCKOWNER;
ia->read.in.lock_owner = fuse_lock_owner_id(fc, owner); ia->read.in.lock_owner = fuse_lock_owner_id(fm->fc, owner);
} }
if (ia->io->async) if (ia->io->async)
return fuse_async_req_send(fc, ia, count); return fuse_async_req_send(fm, ia, count);
return fuse_simple_request(fc, &ia->ap.args); return fuse_simple_request(fm, &ia->ap.args);
} }
static void fuse_read_update_size(struct inode *inode, loff_t size, static void fuse_read_update_size(struct inode *inode, loff_t size,
...@@ -798,7 +812,7 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read, ...@@ -798,7 +812,7 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
static int fuse_do_readpage(struct file *file, struct page *page) static int fuse_do_readpage(struct file *file, struct page *page)
{ {
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
loff_t pos = page_offset(page); loff_t pos = page_offset(page);
struct fuse_page_desc desc = { .length = PAGE_SIZE }; struct fuse_page_desc desc = { .length = PAGE_SIZE };
struct fuse_io_args ia = { struct fuse_io_args ia = {
...@@ -818,14 +832,14 @@ static int fuse_do_readpage(struct file *file, struct page *page) ...@@ -818,14 +832,14 @@ static int fuse_do_readpage(struct file *file, struct page *page)
*/ */
fuse_wait_on_page_writeback(inode, page->index); fuse_wait_on_page_writeback(inode, page->index);
attr_ver = fuse_get_attr_version(fc); attr_ver = fuse_get_attr_version(fm->fc);
/* Don't overflow end offset */ /* Don't overflow end offset */
if (pos + (desc.length - 1) == LLONG_MAX) if (pos + (desc.length - 1) == LLONG_MAX)
desc.length--; desc.length--;
fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ);
res = fuse_simple_request(fc, &ia.ap.args); res = fuse_simple_request(fm, &ia.ap.args);
if (res < 0) if (res < 0)
return res; return res;
/* /*
...@@ -855,7 +869,7 @@ static int fuse_readpage(struct file *file, struct page *page) ...@@ -855,7 +869,7 @@ static int fuse_readpage(struct file *file, struct page *page)
return err; return err;
} }
static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args, static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
int err) int err)
{ {
int i; int i;
...@@ -899,7 +913,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args, ...@@ -899,7 +913,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_args *args,
static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
{ {
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc; struct fuse_mount *fm = ff->fm;
struct fuse_args_pages *ap = &ia->ap; struct fuse_args_pages *ap = &ia->ap;
loff_t pos = page_offset(ap->pages[0]); loff_t pos = page_offset(ap->pages[0]);
size_t count = ap->num_pages << PAGE_SHIFT; size_t count = ap->num_pages << PAGE_SHIFT;
...@@ -918,18 +932,18 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) ...@@ -918,18 +932,18 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
WARN_ON((loff_t) (pos + count) < 0); WARN_ON((loff_t) (pos + count) < 0);
fuse_read_args_fill(ia, file, pos, count, FUSE_READ); fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
ia->read.attr_ver = fuse_get_attr_version(fc); ia->read.attr_ver = fuse_get_attr_version(fm->fc);
if (fc->async_read) { if (fm->fc->async_read) {
ia->ff = fuse_file_get(ff); ia->ff = fuse_file_get(ff);
ap->args.end = fuse_readpages_end; ap->args.end = fuse_readpages_end;
err = fuse_simple_background(fc, &ap->args, GFP_KERNEL); err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
if (!err) if (!err)
return; return;
} else { } else {
res = fuse_simple_request(fc, &ap->args); res = fuse_simple_request(fm, &ap->args);
err = res < 0 ? res : 0; err = res < 0 ? res : 0;
} }
fuse_readpages_end(fc, &ap->args, err); fuse_readpages_end(fm, &ap->args, err);
} }
static void fuse_readahead(struct readahead_control *rac) static void fuse_readahead(struct readahead_control *rac)
...@@ -1000,7 +1014,7 @@ static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff, ...@@ -1000,7 +1014,7 @@ static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
args->opcode = FUSE_WRITE; args->opcode = FUSE_WRITE;
args->nodeid = ff->nodeid; args->nodeid = ff->nodeid;
args->in_numargs = 2; args->in_numargs = 2;
if (ff->fc->minor < 9) if (ff->fm->fc->minor < 9)
args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
else else
args->in_args[0].size = sizeof(ia->write.in); args->in_args[0].size = sizeof(ia->write.in);
...@@ -1029,7 +1043,7 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, ...@@ -1029,7 +1043,7 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
struct kiocb *iocb = ia->io->iocb; struct kiocb *iocb = ia->io->iocb;
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc; struct fuse_mount *fm = ff->fm;
struct fuse_write_in *inarg = &ia->write.in; struct fuse_write_in *inarg = &ia->write.in;
ssize_t err; ssize_t err;
...@@ -1037,13 +1051,13 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, ...@@ -1037,13 +1051,13 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
inarg->flags = fuse_write_flags(iocb); inarg->flags = fuse_write_flags(iocb);
if (owner != NULL) { if (owner != NULL) {
inarg->write_flags |= FUSE_WRITE_LOCKOWNER; inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
inarg->lock_owner = fuse_lock_owner_id(fc, owner); inarg->lock_owner = fuse_lock_owner_id(fm->fc, owner);
} }
if (ia->io->async) if (ia->io->async)
return fuse_async_req_send(fc, ia, count); return fuse_async_req_send(fm, ia, count);
err = fuse_simple_request(fc, &ia->ap.args); err = fuse_simple_request(fm, &ia->ap.args);
if (!err && ia->write.out.size > count) if (!err && ia->write.out.size > count)
err = -EIO; err = -EIO;
...@@ -1074,7 +1088,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, ...@@ -1074,7 +1088,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
struct fuse_args_pages *ap = &ia->ap; struct fuse_args_pages *ap = &ia->ap;
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc; struct fuse_mount *fm = ff->fm;
unsigned int offset, i; unsigned int offset, i;
int err; int err;
...@@ -1084,7 +1098,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, ...@@ -1084,7 +1098,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
fuse_write_args_fill(ia, ff, pos, count); fuse_write_args_fill(ia, ff, pos, count);
ia->write.in.flags = fuse_write_flags(iocb); ia->write.in.flags = fuse_write_flags(iocb);
err = fuse_simple_request(fc, &ap->args); err = fuse_simple_request(fm, &ap->args);
if (!err && ia->write.out.size > count) if (!err && ia->write.out.size > count)
err = -EIO; err = -EIO;
...@@ -1399,7 +1413,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, ...@@ -1399,7 +1413,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
struct file *file = io->iocb->ki_filp; struct file *file = io->iocb->ki_filp;
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc; struct fuse_conn *fc = ff->fm->fc;
size_t nmax = write ? fc->max_write : fc->max_read; size_t nmax = write ? fc->max_write : fc->max_read;
loff_t pos = *ppos; loff_t pos = *ppos;
size_t count = iov_iter_count(iter); size_t count = iov_iter_count(iter);
...@@ -1539,10 +1553,14 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -1539,10 +1553,14 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file);
if (is_bad_inode(file_inode(file))) if (is_bad_inode(inode))
return -EIO; return -EIO;
if (FUSE_IS_DAX(inode))
return fuse_dax_read_iter(iocb, to);
if (!(ff->open_flags & FOPEN_DIRECT_IO)) if (!(ff->open_flags & FOPEN_DIRECT_IO))
return fuse_cache_read_iter(iocb, to); return fuse_cache_read_iter(iocb, to);
else else
...@@ -1553,10 +1571,14 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1553,10 +1571,14 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file);
if (is_bad_inode(file_inode(file))) if (is_bad_inode(inode))
return -EIO; return -EIO;
if (FUSE_IS_DAX(inode))
return fuse_dax_write_iter(iocb, from);
if (!(ff->open_flags & FOPEN_DIRECT_IO)) if (!(ff->open_flags & FOPEN_DIRECT_IO))
return fuse_cache_write_iter(iocb, from); return fuse_cache_write_iter(iocb, from);
else else
...@@ -1578,7 +1600,7 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa) ...@@ -1578,7 +1600,7 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
kfree(wpa); kfree(wpa);
} }
static void fuse_writepage_finish(struct fuse_conn *fc, static void fuse_writepage_finish(struct fuse_mount *fm,
struct fuse_writepage_args *wpa) struct fuse_writepage_args *wpa)
{ {
struct fuse_args_pages *ap = &wpa->ia.ap; struct fuse_args_pages *ap = &wpa->ia.ap;
...@@ -1596,7 +1618,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, ...@@ -1596,7 +1618,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc,
} }
/* Called under fi->lock, may release and reacquire it */ /* Called under fi->lock, may release and reacquire it */
static void fuse_send_writepage(struct fuse_conn *fc, static void fuse_send_writepage(struct fuse_mount *fm,
struct fuse_writepage_args *wpa, loff_t size) struct fuse_writepage_args *wpa, loff_t size)
__releases(fi->lock) __releases(fi->lock)
__acquires(fi->lock) __acquires(fi->lock)
...@@ -1622,10 +1644,10 @@ __acquires(fi->lock) ...@@ -1622,10 +1644,10 @@ __acquires(fi->lock)
args->force = true; args->force = true;
args->nocreds = true; args->nocreds = true;
err = fuse_simple_background(fc, args, GFP_ATOMIC); err = fuse_simple_background(fm, args, GFP_ATOMIC);
if (err == -ENOMEM) { if (err == -ENOMEM) {
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
err = fuse_simple_background(fc, args, GFP_NOFS | __GFP_NOFAIL); err = fuse_simple_background(fm, args, GFP_NOFS | __GFP_NOFAIL);
spin_lock(&fi->lock); spin_lock(&fi->lock);
} }
...@@ -1638,7 +1660,7 @@ __acquires(fi->lock) ...@@ -1638,7 +1660,7 @@ __acquires(fi->lock)
out_free: out_free:
fi->writectr--; fi->writectr--;
rb_erase(&wpa->writepages_entry, &fi->writepages); rb_erase(&wpa->writepages_entry, &fi->writepages);
fuse_writepage_finish(fc, wpa); fuse_writepage_finish(fm, wpa);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
/* After fuse_writepage_finish() aux request list is private */ /* After fuse_writepage_finish() aux request list is private */
...@@ -1662,7 +1684,7 @@ void fuse_flush_writepages(struct inode *inode) ...@@ -1662,7 +1684,7 @@ void fuse_flush_writepages(struct inode *inode)
__releases(fi->lock) __releases(fi->lock)
__acquires(fi->lock) __acquires(fi->lock)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
loff_t crop = i_size_read(inode); loff_t crop = i_size_read(inode);
struct fuse_writepage_args *wpa; struct fuse_writepage_args *wpa;
...@@ -1671,7 +1693,7 @@ __acquires(fi->lock) ...@@ -1671,7 +1693,7 @@ __acquires(fi->lock)
wpa = list_entry(fi->queued_writes.next, wpa = list_entry(fi->queued_writes.next,
struct fuse_writepage_args, queue_entry); struct fuse_writepage_args, queue_entry);
list_del_init(&wpa->queue_entry); list_del_init(&wpa->queue_entry);
fuse_send_writepage(fc, wpa, crop); fuse_send_writepage(fm, wpa, crop);
} }
} }
...@@ -1712,7 +1734,7 @@ static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa) ...@@ -1712,7 +1734,7 @@ static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
WARN_ON(fuse_insert_writeback(root, wpa)); WARN_ON(fuse_insert_writeback(root, wpa));
} }
static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
int error) int error)
{ {
struct fuse_writepage_args *wpa = struct fuse_writepage_args *wpa =
...@@ -1724,7 +1746,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, ...@@ -1724,7 +1746,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
spin_lock(&fi->lock); spin_lock(&fi->lock);
rb_erase(&wpa->writepages_entry, &fi->writepages); rb_erase(&wpa->writepages_entry, &fi->writepages);
while (wpa->next) { while (wpa->next) {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_write_in *inarg = &wpa->ia.write.in; struct fuse_write_in *inarg = &wpa->ia.write.in;
struct fuse_writepage_args *next = wpa->next; struct fuse_writepage_args *next = wpa->next;
...@@ -1756,10 +1778,10 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, ...@@ -1756,10 +1778,10 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
* no invocations of fuse_writepage_end() while we're in * no invocations of fuse_writepage_end() while we're in
* fuse_set_nowrite..fuse_release_nowrite section. * fuse_set_nowrite..fuse_release_nowrite section.
*/ */
fuse_send_writepage(fc, next, inarg->offset + inarg->size); fuse_send_writepage(fm, next, inarg->offset + inarg->size);
} }
fi->writectr--; fi->writectr--;
fuse_writepage_finish(fc, wpa); fuse_writepage_finish(fm, wpa);
spin_unlock(&fi->lock); spin_unlock(&fi->lock);
fuse_writepage_free(wpa); fuse_writepage_free(wpa);
} }
...@@ -2317,6 +2339,10 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -2317,6 +2339,10 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{ {
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
/* DAX mmap is superior to direct_io mmap */
if (FUSE_IS_DAX(file_inode(file)))
return fuse_dax_mmap(file, vma);
if (ff->open_flags & FOPEN_DIRECT_IO) { if (ff->open_flags & FOPEN_DIRECT_IO) {
/* Can't provide the coherency needed for MAP_SHARED */ /* Can't provide the coherency needed for MAP_SHARED */
if (vma->vm_flags & VM_MAYSHARE) if (vma->vm_flags & VM_MAYSHARE)
...@@ -2395,7 +2421,7 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file, ...@@ -2395,7 +2421,7 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file,
static int fuse_getlk(struct file *file, struct file_lock *fl) static int fuse_getlk(struct file *file, struct file_lock *fl)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_lk_in inarg; struct fuse_lk_in inarg;
struct fuse_lk_out outarg; struct fuse_lk_out outarg;
...@@ -2405,9 +2431,9 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) ...@@ -2405,9 +2431,9 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
args.out_numargs = 1; args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg); args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg; args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (!err) if (!err)
err = convert_fuse_file_lock(fc, &outarg.lk, fl); err = convert_fuse_file_lock(fm->fc, &outarg.lk, fl);
return err; return err;
} }
...@@ -2415,12 +2441,12 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) ...@@ -2415,12 +2441,12 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_lk_in inarg; struct fuse_lk_in inarg;
int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL; struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
pid_t pid_nr = pid_nr_ns(pid, fc->pid_ns); pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns);
int err; int err;
if (fl->fl_lmops && fl->fl_lmops->lm_grant) { if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
...@@ -2433,7 +2459,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) ...@@ -2433,7 +2459,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
return 0; return 0;
fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg); fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
/* locking is restartable */ /* locking is restartable */
if (err == -EINTR) if (err == -EINTR)
...@@ -2487,13 +2513,13 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) ...@@ -2487,13 +2513,13 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
static sector_t fuse_bmap(struct address_space *mapping, sector_t block) static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_bmap_in inarg; struct fuse_bmap_in inarg;
struct fuse_bmap_out outarg; struct fuse_bmap_out outarg;
int err; int err;
if (!inode->i_sb->s_bdev || fc->no_bmap) if (!inode->i_sb->s_bdev || fm->fc->no_bmap)
return 0; return 0;
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
...@@ -2507,9 +2533,9 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) ...@@ -2507,9 +2533,9 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
args.out_numargs = 1; args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg); args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg; args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) if (err == -ENOSYS)
fc->no_bmap = 1; fm->fc->no_bmap = 1;
return err ? 0 : outarg.block; return err ? 0 : outarg.block;
} }
...@@ -2517,7 +2543,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) ...@@ -2517,7 +2543,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
{ {
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_lseek_in inarg = { struct fuse_lseek_in inarg = {
...@@ -2528,7 +2554,7 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) ...@@ -2528,7 +2554,7 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
struct fuse_lseek_out outarg; struct fuse_lseek_out outarg;
int err; int err;
if (fc->no_lseek) if (fm->fc->no_lseek)
goto fallback; goto fallback;
args.opcode = FUSE_LSEEK; args.opcode = FUSE_LSEEK;
...@@ -2539,10 +2565,10 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) ...@@ -2539,10 +2565,10 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
args.out_numargs = 1; args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg); args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg; args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (err) { if (err) {
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_lseek = 1; fm->fc->no_lseek = 1;
goto fallback; goto fallback;
} }
return err; return err;
...@@ -2728,7 +2754,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2728,7 +2754,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
unsigned int flags) unsigned int flags)
{ {
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc; struct fuse_mount *fm = ff->fm;
struct fuse_ioctl_in inarg = { struct fuse_ioctl_in inarg = {
.fh = ff->fh, .fh = ff->fh,
.cmd = cmd, .cmd = cmd,
...@@ -2761,12 +2787,12 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2761,12 +2787,12 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM; err = -ENOMEM;
ap.pages = fuse_pages_alloc(fc->max_pages, GFP_KERNEL, &ap.descs); ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!ap.pages || !iov_page) if (!ap.pages || !iov_page)
goto out; goto out;
fuse_page_descs_length_init(ap.descs, 0, fc->max_pages); fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages);
/* /*
* If restricted, initialize IO parameters as encoded in @cmd. * If restricted, initialize IO parameters as encoded in @cmd.
...@@ -2811,7 +2837,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2811,7 +2837,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
/* make sure there are enough buffer pages and init request with them */ /* make sure there are enough buffer pages and init request with them */
err = -ENOMEM; err = -ENOMEM;
if (max_pages > fc->max_pages) if (max_pages > fm->fc->max_pages)
goto out; goto out;
while (ap.num_pages < max_pages) { while (ap.num_pages < max_pages) {
ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
...@@ -2848,7 +2874,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2848,7 +2874,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
ap.args.out_pages = true; ap.args.out_pages = true;
ap.args.out_argvar = true; ap.args.out_argvar = true;
transferred = fuse_simple_request(fc, &ap.args); transferred = fuse_simple_request(fm, &ap.args);
err = transferred; err = transferred;
if (transferred < 0) if (transferred < 0)
goto out; goto out;
...@@ -2876,7 +2902,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2876,7 +2902,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
goto out; goto out;
vaddr = kmap_atomic(ap.pages[0]); vaddr = kmap_atomic(ap.pages[0]);
err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr,
transferred, in_iovs + out_iovs, transferred, in_iovs + out_iovs,
(flags & FUSE_IOCTL_COMPAT) != 0); (flags & FUSE_IOCTL_COMPAT) != 0);
kunmap_atomic(vaddr); kunmap_atomic(vaddr);
...@@ -2886,11 +2912,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ...@@ -2886,11 +2912,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iov = iov_page; in_iov = iov_page;
out_iov = in_iov + in_iovs; out_iov = in_iov + in_iovs;
err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs); err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs);
if (err) if (err)
goto out; goto out;
err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs); err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs);
if (err) if (err)
goto out; goto out;
...@@ -3000,13 +3026,13 @@ static void fuse_register_polled_file(struct fuse_conn *fc, ...@@ -3000,13 +3026,13 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
__poll_t fuse_file_poll(struct file *file, poll_table *wait) __poll_t fuse_file_poll(struct file *file, poll_table *wait)
{ {
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc; struct fuse_mount *fm = ff->fm;
struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
struct fuse_poll_out outarg; struct fuse_poll_out outarg;
FUSE_ARGS(args); FUSE_ARGS(args);
int err; int err;
if (fc->no_poll) if (fm->fc->no_poll)
return DEFAULT_POLLMASK; return DEFAULT_POLLMASK;
poll_wait(file, &ff->poll_wait, wait); poll_wait(file, &ff->poll_wait, wait);
...@@ -3018,7 +3044,7 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait) ...@@ -3018,7 +3044,7 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait)
*/ */
if (waitqueue_active(&ff->poll_wait)) { if (waitqueue_active(&ff->poll_wait)) {
inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY;
fuse_register_polled_file(fc, ff); fuse_register_polled_file(fm->fc, ff);
} }
args.opcode = FUSE_POLL; args.opcode = FUSE_POLL;
...@@ -3029,12 +3055,12 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait) ...@@ -3029,12 +3055,12 @@ __poll_t fuse_file_poll(struct file *file, poll_table *wait)
args.out_numargs = 1; args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg); args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg; args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (!err) if (!err)
return demangle_poll(outarg.revents); return demangle_poll(outarg.revents);
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_poll = 1; fm->fc->no_poll = 1;
return DEFAULT_POLLMASK; return DEFAULT_POLLMASK;
} }
return EPOLLERR; return EPOLLERR;
...@@ -3120,13 +3146,13 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -3120,13 +3146,13 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
* By default, we want to optimize all I/Os with async request * By default, we want to optimize all I/Os with async request
* submission to the client filesystem if supported. * submission to the client filesystem if supported.
*/ */
io->async = ff->fc->async_dio; io->async = ff->fm->fc->async_dio;
io->iocb = iocb; io->iocb = iocb;
io->blocking = is_sync_kiocb(iocb); io->blocking = is_sync_kiocb(iocb);
/* optimization for short read */ /* optimization for short read */
if (io->async && !io->write && offset + count > i_size) { if (io->async && !io->write && offset + count > i_size) {
iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset)); iov_iter_truncate(iter, fuse_round_up(ff->fm->fc, i_size - offset));
shortened = count - iov_iter_count(iter); shortened = count - iov_iter_count(iter);
count -= shortened; count -= shortened;
} }
...@@ -3196,7 +3222,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, ...@@ -3196,7 +3222,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
struct fuse_file *ff = file->private_data; struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = ff->fc; struct fuse_mount *fm = ff->fm;
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_fallocate_in inarg = { struct fuse_fallocate_in inarg = {
.fh = ff->fh, .fh = ff->fh,
...@@ -3208,14 +3234,23 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, ...@@ -3208,14 +3234,23 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
(mode & FALLOC_FL_PUNCH_HOLE); (mode & FALLOC_FL_PUNCH_HOLE);
bool block_faults = FUSE_IS_DAX(inode) && lock_inode;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (fc->no_fallocate) if (fm->fc->no_fallocate)
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (lock_inode) { if (lock_inode) {
inode_lock(inode); inode_lock(inode);
if (block_faults) {
down_write(&fi->i_mmap_sem);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
goto out;
}
if (mode & FALLOC_FL_PUNCH_HOLE) { if (mode & FALLOC_FL_PUNCH_HOLE) {
loff_t endbyte = offset + length - 1; loff_t endbyte = offset + length - 1;
...@@ -3240,9 +3275,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, ...@@ -3240,9 +3275,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
args.in_numargs = 1; args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg); args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg; args.in_args[0].value = &inarg;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_fallocate = 1; fm->fc->no_fallocate = 1;
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
} }
if (err) if (err)
...@@ -3252,7 +3287,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, ...@@ -3252,7 +3287,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE)) { if (!(mode & FALLOC_FL_KEEP_SIZE)) {
bool changed = fuse_write_update_size(inode, offset + length); bool changed = fuse_write_update_size(inode, offset + length);
if (changed && fc->writeback_cache) if (changed && fm->fc->writeback_cache)
file_update_time(file); file_update_time(file);
} }
...@@ -3265,6 +3300,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, ...@@ -3265,6 +3300,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE)) if (!(mode & FALLOC_FL_KEEP_SIZE))
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
if (block_faults)
up_write(&fi->i_mmap_sem);
if (lock_inode) if (lock_inode)
inode_unlock(inode); inode_unlock(inode);
...@@ -3280,7 +3318,8 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, ...@@ -3280,7 +3318,8 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
struct inode *inode_in = file_inode(file_in); struct inode *inode_in = file_inode(file_in);
struct inode *inode_out = file_inode(file_out); struct inode *inode_out = file_inode(file_out);
struct fuse_inode *fi_out = get_fuse_inode(inode_out); struct fuse_inode *fi_out = get_fuse_inode(inode_out);
struct fuse_conn *fc = ff_in->fc; struct fuse_mount *fm = ff_in->fm;
struct fuse_conn *fc = fm->fc;
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_copy_file_range_in inarg = { struct fuse_copy_file_range_in inarg = {
.fh_in = ff_in->fh, .fh_in = ff_in->fh,
...@@ -3349,7 +3388,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, ...@@ -3349,7 +3388,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
args.out_numargs = 1; args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg); args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg; args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_copy_file_range = 1; fc->no_copy_file_range = 1;
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
...@@ -3404,6 +3443,7 @@ static const struct file_operations fuse_file_operations = { ...@@ -3404,6 +3443,7 @@ static const struct file_operations fuse_file_operations = {
.release = fuse_release, .release = fuse_release,
.fsync = fuse_fsync, .fsync = fuse_fsync,
.lock = fuse_file_lock, .lock = fuse_file_lock,
.get_unmapped_area = thp_get_unmapped_area,
.flock = fuse_file_flock, .flock = fuse_file_flock,
.splice_read = generic_file_splice_read, .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write, .splice_write = iter_file_splice_write,
...@@ -3439,4 +3479,7 @@ void fuse_init_file_inode(struct inode *inode) ...@@ -3439,4 +3479,7 @@ void fuse_init_file_inode(struct inode *inode)
fi->writectr = 0; fi->writectr = 0;
init_waitqueue_head(&fi->page_waitq); init_waitqueue_head(&fi->page_waitq);
fi->writepages = RB_ROOT; fi->writepages = RB_ROOT;
if (IS_ENABLED(CONFIG_FUSE_DAX))
fuse_dax_inode_init(inode);
} }
...@@ -148,6 +148,20 @@ struct fuse_inode { ...@@ -148,6 +148,20 @@ struct fuse_inode {
/** Lock to protect write related fields */ /** Lock to protect write related fields */
spinlock_t lock; spinlock_t lock;
/**
* Can't take inode lock in fault path (leads to circular dependency).
* Introduce another semaphore which can be taken in fault path and
* then other filesystem paths can take this to block faults.
*/
struct rw_semaphore i_mmap_sem;
#ifdef CONFIG_FUSE_DAX
/*
* Dax specific inode data
*/
struct fuse_inode_dax *dax;
#endif
}; };
/** FUSE inode state bits */ /** FUSE inode state bits */
...@@ -161,12 +175,13 @@ enum { ...@@ -161,12 +175,13 @@ enum {
}; };
struct fuse_conn; struct fuse_conn;
struct fuse_mount;
struct fuse_release_args; struct fuse_release_args;
/** FUSE specific file data */ /** FUSE specific file data */
struct fuse_file { struct fuse_file {
/** Fuse connection for this file */ /** Fuse connection for this file */
struct fuse_conn *fc; struct fuse_mount *fm;
/* Argument space reserved for release */ /* Argument space reserved for release */
struct fuse_release_args *release_args; struct fuse_release_args *release_args;
...@@ -252,7 +267,7 @@ struct fuse_args { ...@@ -252,7 +267,7 @@ struct fuse_args {
bool may_block:1; bool may_block:1;
struct fuse_in_arg in_args[3]; struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2]; struct fuse_arg out_args[2];
void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error); void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
}; };
struct fuse_args_pages { struct fuse_args_pages {
...@@ -360,6 +375,9 @@ struct fuse_req { ...@@ -360,6 +375,9 @@ struct fuse_req {
/** virtio-fs's physically contiguous buffer for in and out args */ /** virtio-fs's physically contiguous buffer for in and out args */
void *argbuf; void *argbuf;
#endif #endif
/** fuse_mount this request belongs to */
struct fuse_mount *fm;
}; };
struct fuse_iqueue; struct fuse_iqueue;
...@@ -482,11 +500,15 @@ struct fuse_fs_context { ...@@ -482,11 +500,15 @@ struct fuse_fs_context {
bool destroy:1; bool destroy:1;
bool no_control:1; bool no_control:1;
bool no_force_umount:1; bool no_force_umount:1;
bool no_mount_options:1; bool legacy_opts_show:1;
bool dax:1;
unsigned int max_read; unsigned int max_read;
unsigned int blksize; unsigned int blksize;
const char *subtype; const char *subtype;
/* DAX device, may be NULL */
struct dax_device *dax_dev;
/* fuse_dev pointer to fill in, should contain NULL on entry */ /* fuse_dev pointer to fill in, should contain NULL on entry */
void **fudptr; void **fudptr;
}; };
...@@ -494,9 +516,9 @@ struct fuse_fs_context { ...@@ -494,9 +516,9 @@ struct fuse_fs_context {
/** /**
* A Fuse connection. * A Fuse connection.
* *
* This structure is created, when the filesystem is mounted, and is * This structure is created, when the root filesystem is mounted, and
* destroyed, when the client device is closed and the filesystem is * is destroyed, when the client device is closed and the last
* unmounted. * fuse_mount is destroyed.
*/ */
struct fuse_conn { struct fuse_conn {
/** Lock protecting accessess to members of this structure */ /** Lock protecting accessess to members of this structure */
...@@ -610,6 +632,9 @@ struct fuse_conn { ...@@ -610,6 +632,9 @@ struct fuse_conn {
/** cache READLINK responses in page cache */ /** cache READLINK responses in page cache */
unsigned cache_symlinks:1; unsigned cache_symlinks:1;
/* show legacy mount options */
unsigned int legacy_opts_show:1;
/* /*
* The following bitfields are only for optimization purposes * The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction * and hence races in setting them will not cause malfunction
...@@ -717,8 +742,8 @@ struct fuse_conn { ...@@ -717,8 +742,8 @@ struct fuse_conn {
/** Do not allow MNT_FORCE umount */ /** Do not allow MNT_FORCE umount */
unsigned int no_force_umount:1; unsigned int no_force_umount:1;
/* Do not show mount options */ /* Auto-mount submounts announced by the server */
unsigned int no_mount_options:1; unsigned int auto_submounts:1;
/** The number of requests waiting for completion */ /** The number of requests waiting for completion */
atomic_t num_waiting; atomic_t num_waiting;
...@@ -726,10 +751,10 @@ struct fuse_conn { ...@@ -726,10 +751,10 @@ struct fuse_conn {
/** Negotiated minor version */ /** Negotiated minor version */
unsigned minor; unsigned minor;
/** Entry on the fuse_conn_list */ /** Entry on the fuse_mount_list */
struct list_head entry; struct list_head entry;
/** Device ID from super block */ /** Device ID from the root super block */
dev_t dev; dev_t dev;
/** Dentries in the control filesystem */ /** Dentries in the control filesystem */
...@@ -747,24 +772,70 @@ struct fuse_conn { ...@@ -747,24 +772,70 @@ struct fuse_conn {
/** Called on final put */ /** Called on final put */
void (*release)(struct fuse_conn *); void (*release)(struct fuse_conn *);
/** Super block for this connection. */ /**
struct super_block *sb; * Read/write semaphore to hold when accessing the sb of any
* fuse_mount belonging to this connection
/** Read/write semaphore to hold when accessing sb. */ */
struct rw_semaphore killsb; struct rw_semaphore killsb;
/** List of device instances belonging to this connection */ /** List of device instances belonging to this connection */
struct list_head devices; struct list_head devices;
#ifdef CONFIG_FUSE_DAX
/* Dax specific conn data, non-NULL if DAX is enabled */
struct fuse_conn_dax *dax;
#endif
/** List of filesystems using this connection */
struct list_head mounts;
}; };
static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) /*
* Represents a mounted filesystem, potentially a submount.
*
* This object allows sharing a fuse_conn between separate mounts to
* allow submounts with dedicated superblocks and thus separate device
* IDs.
*/
struct fuse_mount {
/* Underlying (potentially shared) connection to the FUSE server */
struct fuse_conn *fc;
/* Refcount */
refcount_t count;
/*
* Super block for this connection (fc->killsb must be held when
* accessing this).
*/
struct super_block *sb;
/* Entry on fc->mounts */
struct list_head fc_entry;
};
static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
{ {
return sb->s_fs_info; return sb->s_fs_info;
} }
static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
{
struct fuse_mount *fm = get_fuse_mount_super(sb);
return fm ? fm->fc : NULL;
}
static inline struct fuse_mount *get_fuse_mount(struct inode *inode)
{
return get_fuse_mount_super(inode->i_sb);
}
static inline struct fuse_conn *get_fuse_conn(struct inode *inode) static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
{ {
return get_fuse_conn_super(inode->i_sb); struct fuse_mount *fm = get_fuse_mount(inode);
return fm ? fm->fc : NULL;
} }
static inline struct fuse_inode *get_fuse_inode(struct inode *inode) static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
...@@ -793,11 +864,6 @@ extern const struct file_operations fuse_dev_operations; ...@@ -793,11 +864,6 @@ extern const struct file_operations fuse_dev_operations;
extern const struct dentry_operations fuse_dentry_operations; extern const struct dentry_operations fuse_dentry_operations;
extern const struct dentry_operations fuse_root_dentry_operations; extern const struct dentry_operations fuse_root_dentry_operations;
/**
* Inode to nodeid comparison.
*/
int fuse_inode_eq(struct inode *inode, void *_nodeidp);
/** /**
* Get a filled in inode * Get a filled in inode
*/ */
...@@ -848,7 +914,7 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos, ...@@ -848,7 +914,7 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
*/ */
int fuse_open_common(struct inode *inode, struct file *file, bool isdir); int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); struct fuse_file *fuse_file_alloc(struct fuse_mount *fm);
void fuse_file_free(struct fuse_file *ff); void fuse_file_free(struct fuse_file *ff);
void fuse_finish_open(struct inode *inode, struct file *file); void fuse_finish_open(struct inode *inode, struct file *file);
...@@ -916,14 +982,14 @@ void __exit fuse_ctl_cleanup(void); ...@@ -916,14 +982,14 @@ void __exit fuse_ctl_cleanup(void);
/** /**
* Simple request sending that does request allocation and freeing * Simple request sending that does request allocation and freeing
*/ */
ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args); ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args);
int fuse_simple_background(struct fuse_conn *fc, struct fuse_args *args, int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
gfp_t gfp_flags); gfp_t gfp_flags);
/** /**
* End a finished request * End a finished request
*/ */
void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req); void fuse_request_end(struct fuse_req *req);
/* Abort all requests */ /* Abort all requests */
void fuse_abort_conn(struct fuse_conn *fc); void fuse_abort_conn(struct fuse_conn *fc);
...@@ -949,7 +1015,8 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); ...@@ -949,7 +1015,8 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
/** /**
* Initialize fuse_conn * Initialize fuse_conn
*/ */
void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
struct user_namespace *user_ns,
const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv); const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv);
/** /**
...@@ -957,11 +1024,21 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, ...@@ -957,11 +1024,21 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
*/ */
void fuse_conn_put(struct fuse_conn *fc); void fuse_conn_put(struct fuse_conn *fc);
/**
* Acquire reference to fuse_mount
*/
struct fuse_mount *fuse_mount_get(struct fuse_mount *fm);
/**
* Release reference to fuse_mount
*/
void fuse_mount_put(struct fuse_mount *fm);
struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
struct fuse_dev *fuse_dev_alloc(void); struct fuse_dev *fuse_dev_alloc(void);
void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
void fuse_dev_free(struct fuse_dev *fud); void fuse_dev_free(struct fuse_dev *fud);
void fuse_send_init(struct fuse_conn *fc); void fuse_send_init(struct fuse_mount *fm);
/** /**
* Fill in superblock and initialize fuse connection * Fill in superblock and initialize fuse connection
...@@ -970,12 +1047,26 @@ void fuse_send_init(struct fuse_conn *fc); ...@@ -970,12 +1047,26 @@ void fuse_send_init(struct fuse_conn *fc);
*/ */
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx); int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx);
/** /*
* Disassociate fuse connection from superblock and kill the superblock * Fill in superblock for submounts
* @sb: partially-initialized superblock to fill in
* @parent_fi: The fuse_inode of the parent filesystem where this submount is
* mounted
*/
int fuse_fill_super_submount(struct super_block *sb,
struct fuse_inode *parent_fi);
/*
* Remove the mount from the connection
* *
* Calls kill_anon_super(), do not use with bdev mounts. * Returns whether this was the last mount
*/ */
void fuse_kill_sb_anon(struct super_block *sb); bool fuse_mount_remove(struct fuse_mount *fm);
/*
* Shut down the connection (possibly sending DESTROY request).
*/
void fuse_conn_destroy(struct fuse_mount *fm);
/** /**
* Add connection to control filesystem * Add connection to control filesystem
...@@ -1010,10 +1101,20 @@ void fuse_flush_writepages(struct inode *inode); ...@@ -1010,10 +1101,20 @@ void fuse_flush_writepages(struct inode *inode);
void fuse_set_nowrite(struct inode *inode); void fuse_set_nowrite(struct inode *inode);
void fuse_release_nowrite(struct inode *inode); void fuse_release_nowrite(struct inode *inode);
/**
* Scan all fuse_mounts belonging to fc to find the first where
* ilookup5() returns a result. Return that result and the
* respective fuse_mount in *fm (unless fm is NULL).
*
* The caller must hold fc->killsb.
*/
struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
struct fuse_mount **fm);
/** /**
* File-system tells the kernel to invalidate cache for the given node id. * File-system tells the kernel to invalidate cache for the given node id.
*/ */
int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
loff_t offset, loff_t len); loff_t offset, loff_t len);
/** /**
...@@ -1026,10 +1127,10 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, ...@@ -1026,10 +1127,10 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
* - is a file or oan empty directory * - is a file or oan empty directory
* then the dentry is unhashed (d_delete()). * then the dentry is unhashed (d_delete()).
*/ */
int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
u64 child_nodeid, struct qstr *name); u64 child_nodeid, struct qstr *name);
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
bool isdir); bool isdir);
/** /**
...@@ -1093,4 +1194,20 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); ...@@ -1093,4 +1194,20 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
u64 fuse_get_unique(struct fuse_iqueue *fiq); u64 fuse_get_unique(struct fuse_iqueue *fiq);
void fuse_free_conn(struct fuse_conn *fc); void fuse_free_conn(struct fuse_conn *fc);
/* dax.c */
#define FUSE_IS_DAX(inode) (IS_ENABLED(CONFIG_FUSE_DAX) && IS_DAX(inode))
ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end);
int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev);
void fuse_dax_conn_free(struct fuse_conn *fc);
bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi);
void fuse_dax_inode_init(struct inode *inode);
void fuse_dax_inode_cleanup(struct inode *inode);
bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment);
void fuse_dax_cancel_work(struct fuse_conn *fc);
#endif /* _FS_FUSE_I_H */ #endif /* _FS_FUSE_I_H */
...@@ -85,14 +85,22 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) ...@@ -85,14 +85,22 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->orig_ino = 0; fi->orig_ino = 0;
fi->state = 0; fi->state = 0;
mutex_init(&fi->mutex); mutex_init(&fi->mutex);
init_rwsem(&fi->i_mmap_sem);
spin_lock_init(&fi->lock); spin_lock_init(&fi->lock);
fi->forget = fuse_alloc_forget(); fi->forget = fuse_alloc_forget();
if (!fi->forget) { if (!fi->forget)
kmem_cache_free(fuse_inode_cachep, fi); goto out_free;
return NULL;
} if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
goto out_free_forget;
return &fi->inode; return &fi->inode;
out_free_forget:
kfree(fi->forget);
out_free:
kmem_cache_free(fuse_inode_cachep, fi);
return NULL;
} }
static void fuse_free_inode(struct inode *inode) static void fuse_free_inode(struct inode *inode)
...@@ -101,6 +109,9 @@ static void fuse_free_inode(struct inode *inode) ...@@ -101,6 +109,9 @@ static void fuse_free_inode(struct inode *inode)
mutex_destroy(&fi->mutex); mutex_destroy(&fi->mutex);
kfree(fi->forget); kfree(fi->forget);
#ifdef CONFIG_FUSE_DAX
kfree(fi->dax);
#endif
kmem_cache_free(fuse_inode_cachep, fi); kmem_cache_free(fuse_inode_cachep, fi);
} }
...@@ -112,9 +123,15 @@ static void fuse_evict_inode(struct inode *inode) ...@@ -112,9 +123,15 @@ static void fuse_evict_inode(struct inode *inode)
clear_inode(inode); clear_inode(inode);
if (inode->i_sb->s_flags & SB_ACTIVE) { if (inode->i_sb->s_flags & SB_ACTIVE) {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
if (FUSE_IS_DAX(inode))
fuse_dax_inode_cleanup(inode);
if (fi->nlookup) {
fuse_queue_forget(fc, fi->forget, fi->nodeid,
fi->nlookup);
fi->forget = NULL; fi->forget = NULL;
} }
}
if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->write_files));
WARN_ON(!list_empty(&fi->queued_writes)); WARN_ON(!list_empty(&fi->queued_writes));
...@@ -268,7 +285,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) ...@@ -268,7 +285,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
BUG(); BUG();
} }
int fuse_inode_eq(struct inode *inode, void *_nodeidp) static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
{ {
u64 nodeid = *(u64 *) _nodeidp; u64 nodeid = *(u64 *) _nodeidp;
if (get_node_id(inode) == nodeid) if (get_node_id(inode) == nodeid)
...@@ -292,7 +309,26 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, ...@@ -292,7 +309,26 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
struct fuse_inode *fi; struct fuse_inode *fi;
struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_conn *fc = get_fuse_conn_super(sb);
retry: /*
* Auto mount points get their node id from the submount root, which is
* not a unique identifier within this filesystem.
*
* To avoid conflicts, do not place submount points into the inode hash
* table.
*/
if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
S_ISDIR(attr->mode)) {
inode = new_inode(sb);
if (!inode)
return NULL;
fuse_init_inode(inode, attr);
get_fuse_inode(inode)->nodeid = nodeid;
inode->i_flags |= S_AUTOMOUNT;
goto done;
}
retry:
inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
if (!inode) if (!inode)
return NULL; return NULL;
...@@ -310,7 +346,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, ...@@ -310,7 +346,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
iput(inode); iput(inode);
goto retry; goto retry;
} }
done:
fi = get_fuse_inode(inode); fi = get_fuse_inode(inode);
spin_lock(&fi->lock); spin_lock(&fi->lock);
fi->nlookup++; fi->nlookup++;
...@@ -320,16 +356,37 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, ...@@ -320,16 +356,37 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
return inode; return inode;
} }
int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
struct fuse_mount **fm)
{
struct fuse_mount *fm_iter;
struct inode *inode;
WARN_ON(!rwsem_is_locked(&fc->killsb));
list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
if (!fm_iter->sb)
continue;
inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid);
if (inode) {
if (fm)
*fm = fm_iter;
return inode;
}
}
return NULL;
}
int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
loff_t offset, loff_t len) loff_t offset, loff_t len)
{ {
struct fuse_conn *fc = get_fuse_conn_super(sb);
struct fuse_inode *fi; struct fuse_inode *fi;
struct inode *inode; struct inode *inode;
pgoff_t pg_start; pgoff_t pg_start;
pgoff_t pg_end; pgoff_t pg_end;
inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid); inode = fuse_ilookup(fc, nodeid, NULL);
if (!inode) if (!inode)
return -ENOENT; return -ENOENT;
...@@ -379,28 +436,23 @@ static void fuse_umount_begin(struct super_block *sb) ...@@ -379,28 +436,23 @@ static void fuse_umount_begin(struct super_block *sb)
fuse_abort_conn(fc); fuse_abort_conn(fc);
} }
static void fuse_send_destroy(struct fuse_conn *fc) static void fuse_send_destroy(struct fuse_mount *fm)
{ {
if (fc->conn_init) { if (fm->fc->conn_init) {
FUSE_ARGS(args); FUSE_ARGS(args);
args.opcode = FUSE_DESTROY; args.opcode = FUSE_DESTROY;
args.force = true; args.force = true;
args.nocreds = true; args.nocreds = true;
fuse_simple_request(fc, &args); fuse_simple_request(fm, &args);
} }
} }
static void fuse_put_super(struct super_block *sb) static void fuse_put_super(struct super_block *sb)
{ {
struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_mount *fm = get_fuse_mount_super(sb);
mutex_lock(&fuse_mutex);
list_del(&fc->entry);
fuse_ctl_remove_conn(fc);
mutex_unlock(&fuse_mutex);
fuse_conn_put(fc); fuse_mount_put(fm);
} }
static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
...@@ -420,12 +472,12 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr ...@@ -420,12 +472,12 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr
static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
{ {
struct super_block *sb = dentry->d_sb; struct super_block *sb = dentry->d_sb;
struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_mount *fm = get_fuse_mount_super(sb);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_statfs_out outarg; struct fuse_statfs_out outarg;
int err; int err;
if (!fuse_allow_current_process(fc)) { if (!fuse_allow_current_process(fm->fc)) {
buf->f_type = FUSE_SUPER_MAGIC; buf->f_type = FUSE_SUPER_MAGIC;
return 0; return 0;
} }
...@@ -437,7 +489,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -437,7 +489,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
args.out_numargs = 1; args.out_numargs = 1;
args.out_args[0].size = sizeof(outarg); args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg; args.out_args[0].value = &outarg;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (!err) if (!err)
convert_fuse_statfs(buf, &outarg.st); convert_fuse_statfs(buf, &outarg.st);
return err; return err;
...@@ -573,11 +625,11 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) ...@@ -573,11 +625,11 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
struct super_block *sb = root->d_sb; struct super_block *sb = root->d_sb;
struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_conn *fc = get_fuse_conn_super(sb);
if (fc->no_mount_options) if (fc->legacy_opts_show) {
return 0; seq_printf(m, ",user_id=%u",
from_kuid_munged(fc->user_ns, fc->user_id));
seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id)); seq_printf(m, ",group_id=%u",
seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id)); from_kgid_munged(fc->user_ns, fc->group_id));
if (fc->default_permissions) if (fc->default_permissions)
seq_puts(m, ",default_permissions"); seq_puts(m, ",default_permissions");
if (fc->allow_other) if (fc->allow_other)
...@@ -586,6 +638,12 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) ...@@ -586,6 +638,12 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",max_read=%u", fc->max_read); seq_printf(m, ",max_read=%u", fc->max_read);
if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
seq_printf(m, ",blksize=%lu", sb->s_blocksize); seq_printf(m, ",blksize=%lu", sb->s_blocksize);
}
#ifdef CONFIG_FUSE_DAX
if (fc->dax)
seq_puts(m, ",dax");
#endif
return 0; return 0;
} }
...@@ -615,7 +673,8 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq) ...@@ -615,7 +673,8 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
fpq->connected = 1; fpq->connected = 1;
} }
void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
struct user_namespace *user_ns,
const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
{ {
memset(fc, 0, sizeof(*fc)); memset(fc, 0, sizeof(*fc));
...@@ -642,6 +701,11 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns, ...@@ -642,6 +701,11 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->user_ns = get_user_ns(user_ns); fc->user_ns = get_user_ns(user_ns);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
INIT_LIST_HEAD(&fc->mounts);
list_add(&fm->fc_entry, &fc->mounts);
fm->fc = fc;
refcount_set(&fm->count, 1);
} }
EXPORT_SYMBOL_GPL(fuse_conn_init); EXPORT_SYMBOL_GPL(fuse_conn_init);
...@@ -650,6 +714,8 @@ void fuse_conn_put(struct fuse_conn *fc) ...@@ -650,6 +714,8 @@ void fuse_conn_put(struct fuse_conn *fc)
if (refcount_dec_and_test(&fc->count)) { if (refcount_dec_and_test(&fc->count)) {
struct fuse_iqueue *fiq = &fc->iq; struct fuse_iqueue *fiq = &fc->iq;
if (IS_ENABLED(CONFIG_FUSE_DAX))
fuse_dax_conn_free(fc);
if (fiq->ops->release) if (fiq->ops->release)
fiq->ops->release(fiq); fiq->ops->release(fiq);
put_pid_ns(fc->pid_ns); put_pid_ns(fc->pid_ns);
...@@ -666,6 +732,23 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) ...@@ -666,6 +732,23 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
} }
EXPORT_SYMBOL_GPL(fuse_conn_get); EXPORT_SYMBOL_GPL(fuse_conn_get);
void fuse_mount_put(struct fuse_mount *fm)
{
if (refcount_dec_and_test(&fm->count)) {
if (fm->fc)
fuse_conn_put(fm->fc);
kfree(fm);
}
}
EXPORT_SYMBOL_GPL(fuse_mount_put);
struct fuse_mount *fuse_mount_get(struct fuse_mount *fm)
{
refcount_inc(&fm->count);
return fm;
}
EXPORT_SYMBOL_GPL(fuse_mount_get);
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
{ {
struct fuse_attr attr; struct fuse_attr attr;
...@@ -895,14 +978,16 @@ struct fuse_init_args { ...@@ -895,14 +978,16 @@ struct fuse_init_args {
struct fuse_init_out out; struct fuse_init_out out;
}; };
static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args, static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
int error) int error)
{ {
struct fuse_conn *fc = fm->fc;
struct fuse_init_args *ia = container_of(args, typeof(*ia), args); struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
struct fuse_init_out *arg = &ia->out; struct fuse_init_out *arg = &ia->out;
bool ok = true;
if (error || arg->major != FUSE_KERNEL_VERSION) if (error || arg->major != FUSE_KERNEL_VERSION)
fc->conn_error = 1; ok = false;
else { else {
unsigned long ra_pages; unsigned long ra_pages;
...@@ -950,11 +1035,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args, ...@@ -950,11 +1035,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
if (arg->flags & FUSE_HANDLE_KILLPRIV) if (arg->flags & FUSE_HANDLE_KILLPRIV)
fc->handle_killpriv = 1; fc->handle_killpriv = 1;
if (arg->time_gran && arg->time_gran <= 1000000000) if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran; fm->sb->s_time_gran = arg->time_gran;
if ((arg->flags & FUSE_POSIX_ACL)) { if ((arg->flags & FUSE_POSIX_ACL)) {
fc->default_permissions = 1; fc->default_permissions = 1;
fc->posix_acl = 1; fc->posix_acl = 1;
fc->sb->s_xattr = fuse_acl_xattr_handlers; fm->sb->s_xattr = fuse_acl_xattr_handlers;
} }
if (arg->flags & FUSE_CACHE_SYMLINKS) if (arg->flags & FUSE_CACHE_SYMLINKS)
fc->cache_symlinks = 1; fc->cache_symlinks = 1;
...@@ -965,14 +1050,19 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args, ...@@ -965,14 +1050,19 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
min_t(unsigned int, FUSE_MAX_MAX_PAGES, min_t(unsigned int, FUSE_MAX_MAX_PAGES,
max_t(unsigned int, arg->max_pages, 1)); max_t(unsigned int, arg->max_pages, 1));
} }
if (IS_ENABLED(CONFIG_FUSE_DAX) &&
arg->flags & FUSE_MAP_ALIGNMENT &&
!fuse_dax_check_alignment(fc, arg->map_alignment)) {
ok = false;
}
} else { } else {
ra_pages = fc->max_read / PAGE_SIZE; ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1; fc->no_lock = 1;
fc->no_flock = 1; fc->no_flock = 1;
} }
fc->sb->s_bdi->ra_pages = fm->sb->s_bdi->ra_pages =
min(fc->sb->s_bdi->ra_pages, ra_pages); min(fm->sb->s_bdi->ra_pages, ra_pages);
fc->minor = arg->minor; fc->minor = arg->minor;
fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
fc->max_write = max_t(unsigned, 4096, fc->max_write); fc->max_write = max_t(unsigned, 4096, fc->max_write);
...@@ -980,11 +1070,16 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args, ...@@ -980,11 +1070,16 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
} }
kfree(ia); kfree(ia);
if (!ok) {
fc->conn_init = 0;
fc->conn_error = 1;
}
fuse_set_initialized(fc); fuse_set_initialized(fc);
wake_up_all(&fc->blocked_waitq); wake_up_all(&fc->blocked_waitq);
} }
void fuse_send_init(struct fuse_conn *fc) void fuse_send_init(struct fuse_mount *fm)
{ {
struct fuse_init_args *ia; struct fuse_init_args *ia;
...@@ -992,7 +1087,7 @@ void fuse_send_init(struct fuse_conn *fc) ...@@ -992,7 +1087,7 @@ void fuse_send_init(struct fuse_conn *fc)
ia->in.major = FUSE_KERNEL_VERSION; ia->in.major = FUSE_KERNEL_VERSION;
ia->in.minor = FUSE_KERNEL_MINOR_VERSION; ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE; ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
ia->in.flags |= ia->in.flags |=
FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
...@@ -1003,6 +1098,13 @@ void fuse_send_init(struct fuse_conn *fc) ...@@ -1003,6 +1098,13 @@ void fuse_send_init(struct fuse_conn *fc)
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
#ifdef CONFIG_FUSE_DAX
if (fm->fc->dax)
ia->in.flags |= FUSE_MAP_ALIGNMENT;
#endif
if (fm->fc->auto_submounts)
ia->in.flags |= FUSE_SUBMOUNTS;
ia->args.opcode = FUSE_INIT; ia->args.opcode = FUSE_INIT;
ia->args.in_numargs = 1; ia->args.in_numargs = 1;
ia->args.in_args[0].size = sizeof(ia->in); ia->args.in_args[0].size = sizeof(ia->in);
...@@ -1018,8 +1120,8 @@ void fuse_send_init(struct fuse_conn *fc) ...@@ -1018,8 +1120,8 @@ void fuse_send_init(struct fuse_conn *fc)
ia->args.nocreds = true; ia->args.nocreds = true;
ia->args.end = process_init_reply; ia->args.end = process_init_reply;
if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0) if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
process_init_reply(fc, &ia->args, -ENOTCONN); process_init_reply(fm, &ia->args, -ENOTCONN);
} }
EXPORT_SYMBOL_GPL(fuse_send_init); EXPORT_SYMBOL_GPL(fuse_send_init);
...@@ -1130,10 +1232,92 @@ void fuse_dev_free(struct fuse_dev *fud) ...@@ -1130,10 +1232,92 @@ void fuse_dev_free(struct fuse_dev *fud)
} }
EXPORT_SYMBOL_GPL(fuse_dev_free); EXPORT_SYMBOL_GPL(fuse_dev_free);
static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
const struct fuse_inode *fi)
{
*attr = (struct fuse_attr){
.ino = fi->inode.i_ino,
.size = fi->inode.i_size,
.blocks = fi->inode.i_blocks,
.atime = fi->inode.i_atime.tv_sec,
.mtime = fi->inode.i_mtime.tv_sec,
.ctime = fi->inode.i_ctime.tv_sec,
.atimensec = fi->inode.i_atime.tv_nsec,
.mtimensec = fi->inode.i_mtime.tv_nsec,
.ctimensec = fi->inode.i_ctime.tv_nsec,
.mode = fi->inode.i_mode,
.nlink = fi->inode.i_nlink,
.uid = fi->inode.i_uid.val,
.gid = fi->inode.i_gid.val,
.rdev = fi->inode.i_rdev,
.blksize = 1u << fi->inode.i_blkbits,
};
}
static void fuse_sb_defaults(struct super_block *sb)
{
sb->s_magic = FUSE_SUPER_MAGIC;
sb->s_op = &fuse_super_operations;
sb->s_xattr = fuse_xattr_handlers;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_time_gran = 1;
sb->s_export_op = &fuse_export_operations;
sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
if (sb->s_user_ns != &init_user_ns)
sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
/*
* If we are not in the initial user namespace posix
* acls must be translated.
*/
if (sb->s_user_ns != &init_user_ns)
sb->s_xattr = fuse_no_acl_xattr_handlers;
}
int fuse_fill_super_submount(struct super_block *sb,
struct fuse_inode *parent_fi)
{
struct fuse_mount *fm = get_fuse_mount_super(sb);
struct super_block *parent_sb = parent_fi->inode.i_sb;
struct fuse_attr root_attr;
struct inode *root;
fuse_sb_defaults(sb);
fm->sb = sb;
WARN_ON(sb->s_bdi != &noop_backing_dev_info);
sb->s_bdi = bdi_get(parent_sb->s_bdi);
sb->s_xattr = parent_sb->s_xattr;
sb->s_time_gran = parent_sb->s_time_gran;
sb->s_blocksize = parent_sb->s_blocksize;
sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL);
if (parent_sb->s_subtype && !sb->s_subtype)
return -ENOMEM;
fuse_fill_attr_from_inode(&root_attr, parent_fi);
root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
/*
* This inode is just a duplicate, so it is not looked up and
* its nlookup should not be incremented. fuse_iget() does
* that, though, so undo it here.
*/
get_fuse_inode(root)->nlookup--;
sb->s_d_op = &fuse_dentry_operations;
sb->s_root = d_make_root(root);
if (!sb->s_root)
return -ENOMEM;
return 0;
}
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
{ {
struct fuse_dev *fud = NULL; struct fuse_dev *fud = NULL;
struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_mount *fm = get_fuse_mount_super(sb);
struct fuse_conn *fc = fm->fc;
struct inode *root; struct inode *root;
struct dentry *root_dentry; struct dentry *root_dentry;
int err; int err;
...@@ -1142,7 +1326,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) ...@@ -1142,7 +1326,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
if (sb->s_flags & SB_MANDLOCK) if (sb->s_flags & SB_MANDLOCK)
goto err; goto err;
sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); fuse_sb_defaults(sb);
if (ctx->is_bdev) { if (ctx->is_bdev) {
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
...@@ -1157,32 +1341,21 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) ...@@ -1157,32 +1341,21 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
sb->s_subtype = ctx->subtype; sb->s_subtype = ctx->subtype;
ctx->subtype = NULL; ctx->subtype = NULL;
sb->s_magic = FUSE_SUPER_MAGIC; if (IS_ENABLED(CONFIG_FUSE_DAX)) {
sb->s_op = &fuse_super_operations; err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
sb->s_xattr = fuse_xattr_handlers; if (err)
sb->s_maxbytes = MAX_LFS_FILESIZE; goto err;
sb->s_time_gran = 1; }
sb->s_export_op = &fuse_export_operations;
sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
if (sb->s_user_ns != &init_user_ns)
sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
/*
* If we are not in the initial user namespace posix
* acls must be translated.
*/
if (sb->s_user_ns != &init_user_ns)
sb->s_xattr = fuse_no_acl_xattr_handlers;
if (ctx->fudptr) { if (ctx->fudptr) {
err = -ENOMEM; err = -ENOMEM;
fud = fuse_dev_alloc_install(fc); fud = fuse_dev_alloc_install(fc);
if (!fud) if (!fud)
goto err; goto err_free_dax;
} }
fc->dev = sb->s_dev; fc->dev = sb->s_dev;
fc->sb = sb; fm->sb = sb;
err = fuse_bdi_init(fc, sb); err = fuse_bdi_init(fc, sb);
if (err) if (err)
goto err_dev_free; goto err_dev_free;
...@@ -1196,11 +1369,11 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) ...@@ -1196,11 +1369,11 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
fc->allow_other = ctx->allow_other; fc->allow_other = ctx->allow_other;
fc->user_id = ctx->user_id; fc->user_id = ctx->user_id;
fc->group_id = ctx->group_id; fc->group_id = ctx->group_id;
fc->max_read = max_t(unsigned, 4096, ctx->max_read); fc->legacy_opts_show = ctx->legacy_opts_show;
fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
fc->destroy = ctx->destroy; fc->destroy = ctx->destroy;
fc->no_control = ctx->no_control; fc->no_control = ctx->no_control;
fc->no_force_umount = ctx->no_force_umount; fc->no_force_umount = ctx->no_force_umount;
fc->no_mount_options = ctx->no_mount_options;
err = -ENOMEM; err = -ENOMEM;
root = fuse_get_root_inode(sb, ctx->rootmode); root = fuse_get_root_inode(sb, ctx->rootmode);
...@@ -1233,6 +1406,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) ...@@ -1233,6 +1406,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
err_dev_free: err_dev_free:
if (fud) if (fud)
fuse_dev_free(fud); fuse_dev_free(fud);
err_free_dax:
if (IS_ENABLED(CONFIG_FUSE_DAX))
fuse_dax_conn_free(fc);
err: err:
return err; return err;
} }
...@@ -1244,6 +1420,7 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) ...@@ -1244,6 +1420,7 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
struct file *file; struct file *file;
int err; int err;
struct fuse_conn *fc; struct fuse_conn *fc;
struct fuse_mount *fm;
err = -EINVAL; err = -EINVAL;
file = fget(ctx->fd); file = fget(ctx->fd);
...@@ -1264,9 +1441,16 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) ...@@ -1264,9 +1441,16 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
if (!fc) if (!fc)
goto err_fput; goto err_fput;
fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); fm = kzalloc(sizeof(*fm), GFP_KERNEL);
if (!fm) {
kfree(fc);
goto err_fput;
}
fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
fc->release = fuse_free_conn; fc->release = fuse_free_conn;
sb->s_fs_info = fc;
sb->s_fs_info = fm;
err = fuse_fill_super_common(sb, ctx); err = fuse_fill_super_common(sb, ctx);
if (err) if (err)
...@@ -1277,11 +1461,11 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) ...@@ -1277,11 +1461,11 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
* CPUs after this * CPUs after this
*/ */
fput(file); fput(file);
fuse_send_init(get_fuse_conn_super(sb)); fuse_send_init(get_fuse_mount_super(sb));
return 0; return 0;
err_put_conn: err_put_conn:
fuse_conn_put(fc); fuse_mount_put(fm);
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
err_fput: err_fput:
fput(file); fput(file);
...@@ -1325,6 +1509,7 @@ static int fuse_init_fs_context(struct fs_context *fc) ...@@ -1325,6 +1509,7 @@ static int fuse_init_fs_context(struct fs_context *fc)
ctx->max_read = ~0; ctx->max_read = ~0;
ctx->blksize = FUSE_DEFAULT_BLKSIZE; ctx->blksize = FUSE_DEFAULT_BLKSIZE;
ctx->legacy_opts_show = true;
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
if (fc->fs_type == &fuseblk_fs_type) { if (fc->fs_type == &fuseblk_fs_type) {
...@@ -1338,29 +1523,52 @@ static int fuse_init_fs_context(struct fs_context *fc) ...@@ -1338,29 +1523,52 @@ static int fuse_init_fs_context(struct fs_context *fc)
return 0; return 0;
} }
static void fuse_sb_destroy(struct super_block *sb) bool fuse_mount_remove(struct fuse_mount *fm)
{ {
struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_conn *fc = fm->fc;
bool last = false;
down_write(&fc->killsb);
list_del_init(&fm->fc_entry);
if (list_empty(&fc->mounts))
last = true;
up_write(&fc->killsb);
return last;
}
EXPORT_SYMBOL_GPL(fuse_mount_remove);
void fuse_conn_destroy(struct fuse_mount *fm)
{
struct fuse_conn *fc = fm->fc;
if (fc) {
if (fc->destroy) if (fc->destroy)
fuse_send_destroy(fc); fuse_send_destroy(fm);
fuse_abort_conn(fc); fuse_abort_conn(fc);
fuse_wait_aborted(fc); fuse_wait_aborted(fc);
down_write(&fc->killsb); if (!list_empty(&fc->entry)) {
fc->sb = NULL; mutex_lock(&fuse_mutex);
up_write(&fc->killsb); list_del(&fc->entry);
fuse_ctl_remove_conn(fc);
mutex_unlock(&fuse_mutex);
} }
} }
EXPORT_SYMBOL_GPL(fuse_conn_destroy);
void fuse_kill_sb_anon(struct super_block *sb) static void fuse_kill_sb_anon(struct super_block *sb)
{ {
fuse_sb_destroy(sb); struct fuse_mount *fm = get_fuse_mount_super(sb);
bool last;
if (fm) {
last = fuse_mount_remove(fm);
if (last)
fuse_conn_destroy(fm);
}
kill_anon_super(sb); kill_anon_super(sb);
} }
EXPORT_SYMBOL_GPL(fuse_kill_sb_anon);
static struct file_system_type fuse_fs_type = { static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
...@@ -1375,7 +1583,14 @@ MODULE_ALIAS_FS("fuse"); ...@@ -1375,7 +1583,14 @@ MODULE_ALIAS_FS("fuse");
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
static void fuse_kill_sb_blk(struct super_block *sb) static void fuse_kill_sb_blk(struct super_block *sb)
{ {
fuse_sb_destroy(sb); struct fuse_mount *fm = get_fuse_mount_super(sb);
bool last;
if (fm) {
last = fuse_mount_remove(fm);
if (last)
fuse_conn_destroy(fm);
}
kill_block_super(sb); kill_block_super(sb);
} }
......
...@@ -252,7 +252,7 @@ static int fuse_direntplus_link(struct file *file, ...@@ -252,7 +252,7 @@ static int fuse_direntplus_link(struct file *file,
static void fuse_force_forget(struct file *file, u64 nodeid) static void fuse_force_forget(struct file *file, u64 nodeid)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_forget_in inarg; struct fuse_forget_in inarg;
FUSE_ARGS(args); FUSE_ARGS(args);
...@@ -266,7 +266,7 @@ static void fuse_force_forget(struct file *file, u64 nodeid) ...@@ -266,7 +266,7 @@ static void fuse_force_forget(struct file *file, u64 nodeid)
args.force = true; args.force = true;
args.noreply = true; args.noreply = true;
fuse_simple_request(fc, &args); fuse_simple_request(fm, &args);
/* ignore errors */ /* ignore errors */
} }
...@@ -320,7 +320,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) ...@@ -320,7 +320,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
ssize_t res; ssize_t res;
struct page *page; struct page *page;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_io_args ia = {}; struct fuse_io_args ia = {};
struct fuse_args_pages *ap = &ia.ap; struct fuse_args_pages *ap = &ia.ap;
struct fuse_page_desc desc = { .length = PAGE_SIZE }; struct fuse_page_desc desc = { .length = PAGE_SIZE };
...@@ -337,7 +337,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) ...@@ -337,7 +337,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
ap->pages = &page; ap->pages = &page;
ap->descs = &desc; ap->descs = &desc;
if (plus) { if (plus) {
attr_version = fuse_get_attr_version(fc); attr_version = fuse_get_attr_version(fm->fc);
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE, fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS); FUSE_READDIRPLUS);
} else { } else {
...@@ -345,7 +345,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) ...@@ -345,7 +345,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
FUSE_READDIR); FUSE_READDIR);
} }
locked = fuse_lock_inode(inode); locked = fuse_lock_inode(inode);
res = fuse_simple_request(fc, &ap->args); res = fuse_simple_request(fm, &ap->args);
fuse_unlock_inode(inode, locked); fuse_unlock_inode(inode, locked);
if (res >= 0) { if (res >= 0) {
if (!res) { if (!res) {
......
...@@ -5,12 +5,17 @@ ...@@ -5,12 +5,17 @@
*/ */
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/dax.h>
#include <linux/pci.h>
#include <linux/pfn_t.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/virtio.h> #include <linux/virtio.h>
#include <linux/virtio_fs.h> #include <linux/virtio_fs.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/fs_context.h> #include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/uio.h>
#include "fuse_i.h" #include "fuse_i.h"
/* List of virtio-fs device instances and a lock for the list. Also provides /* List of virtio-fs device instances and a lock for the list. Also provides
...@@ -24,6 +29,8 @@ enum { ...@@ -24,6 +29,8 @@ enum {
VQ_REQUEST VQ_REQUEST
}; };
#define VQ_NAME_LEN 24
/* Per-virtqueue state */ /* Per-virtqueue state */
struct virtio_fs_vq { struct virtio_fs_vq {
spinlock_t lock; spinlock_t lock;
...@@ -36,7 +43,7 @@ struct virtio_fs_vq { ...@@ -36,7 +43,7 @@ struct virtio_fs_vq {
bool connected; bool connected;
long in_flight; long in_flight;
struct completion in_flight_zero; /* No inflight requests */ struct completion in_flight_zero; /* No inflight requests */
char name[24]; char name[VQ_NAME_LEN];
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
/* A virtio-fs device instance */ /* A virtio-fs device instance */
...@@ -47,6 +54,12 @@ struct virtio_fs { ...@@ -47,6 +54,12 @@ struct virtio_fs {
struct virtio_fs_vq *vqs; struct virtio_fs_vq *vqs;
unsigned int nvqs; /* number of virtqueues */ unsigned int nvqs; /* number of virtqueues */
unsigned int num_request_queues; /* number of request queues */ unsigned int num_request_queues; /* number of request queues */
struct dax_device *dax_dev;
/* DAX memory window where file contents are mapped */
void *window_kaddr;
phys_addr_t window_phys_addr;
size_t window_len;
}; };
struct virtio_fs_forget_req { struct virtio_fs_forget_req {
...@@ -69,6 +82,44 @@ struct virtio_fs_req_work { ...@@ -69,6 +82,44 @@ struct virtio_fs_req_work {
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
struct fuse_req *req, bool in_flight); struct fuse_req *req, bool in_flight);
enum {
OPT_DAX,
};
static const struct fs_parameter_spec virtio_fs_parameters[] = {
fsparam_flag("dax", OPT_DAX),
{}
};
static int virtio_fs_parse_param(struct fs_context *fc,
struct fs_parameter *param)
{
struct fs_parse_result result;
struct fuse_fs_context *ctx = fc->fs_private;
int opt;
opt = fs_parse(fc, virtio_fs_parameters, param, &result);
if (opt < 0)
return opt;
switch (opt) {
case OPT_DAX:
ctx->dax = 1;
break;
default:
return -EINVAL;
}
return 0;
}
static void virtio_fs_free_fc(struct fs_context *fc)
{
struct fuse_fs_context *ctx = fc->fs_private;
kfree(ctx);
}
static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
{ {
struct virtio_fs *fs = vq->vdev->priv; struct virtio_fs *fs = vq->vdev->priv;
...@@ -289,7 +340,6 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work) ...@@ -289,7 +340,6 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
struct fuse_req *req; struct fuse_req *req;
struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
dispatch_work.work); dispatch_work.work);
struct fuse_conn *fc = fsvq->fud->fc;
int ret; int ret;
pr_debug("virtio-fs: worker %s called.\n", __func__); pr_debug("virtio-fs: worker %s called.\n", __func__);
...@@ -304,7 +354,7 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work) ...@@ -304,7 +354,7 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
list_del_init(&req->list); list_del_init(&req->list);
spin_unlock(&fsvq->lock); spin_unlock(&fsvq->lock);
fuse_request_end(fc, req); fuse_request_end(req);
} }
/* Dispatch pending requests */ /* Dispatch pending requests */
...@@ -335,7 +385,7 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work) ...@@ -335,7 +385,7 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
spin_unlock(&fsvq->lock); spin_unlock(&fsvq->lock);
pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
ret); ret);
fuse_request_end(fc, req); fuse_request_end(req);
} }
} }
} }
...@@ -495,7 +545,6 @@ static void virtio_fs_request_complete(struct fuse_req *req, ...@@ -495,7 +545,6 @@ static void virtio_fs_request_complete(struct fuse_req *req,
struct virtio_fs_vq *fsvq) struct virtio_fs_vq *fsvq)
{ {
struct fuse_pqueue *fpq = &fsvq->fud->pq; struct fuse_pqueue *fpq = &fsvq->fud->pq;
struct fuse_conn *fc = fsvq->fud->fc;
struct fuse_args *args; struct fuse_args *args;
struct fuse_args_pages *ap; struct fuse_args_pages *ap;
unsigned int len, i, thislen; unsigned int len, i, thislen;
...@@ -528,7 +577,7 @@ static void virtio_fs_request_complete(struct fuse_req *req, ...@@ -528,7 +577,7 @@ static void virtio_fs_request_complete(struct fuse_req *req,
clear_bit(FR_SENT, &req->flags); clear_bit(FR_SENT, &req->flags);
spin_unlock(&fpq->lock); spin_unlock(&fpq->lock);
fuse_request_end(fc, req); fuse_request_end(req);
spin_lock(&fsvq->lock); spin_lock(&fsvq->lock);
dec_in_flight_req(fsvq); dec_in_flight_req(fsvq);
spin_unlock(&fsvq->lock); spin_unlock(&fsvq->lock);
...@@ -596,6 +645,26 @@ static void virtio_fs_vq_done(struct virtqueue *vq) ...@@ -596,6 +645,26 @@ static void virtio_fs_vq_done(struct virtqueue *vq)
schedule_work(&fsvq->done_work); schedule_work(&fsvq->done_work);
} }
static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name,
int vq_type)
{
strncpy(fsvq->name, name, VQ_NAME_LEN);
spin_lock_init(&fsvq->lock);
INIT_LIST_HEAD(&fsvq->queued_reqs);
INIT_LIST_HEAD(&fsvq->end_reqs);
init_completion(&fsvq->in_flight_zero);
if (vq_type == VQ_REQUEST) {
INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work);
INIT_DELAYED_WORK(&fsvq->dispatch_work,
virtio_fs_request_dispatch_work);
} else {
INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work);
INIT_DELAYED_WORK(&fsvq->dispatch_work,
virtio_fs_hiprio_dispatch_work);
}
}
/* Initialize virtqueues */ /* Initialize virtqueues */
static int virtio_fs_setup_vqs(struct virtio_device *vdev, static int virtio_fs_setup_vqs(struct virtio_device *vdev,
struct virtio_fs *fs) struct virtio_fs *fs)
...@@ -611,7 +680,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, ...@@ -611,7 +680,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
if (fs->num_request_queues == 0) if (fs->num_request_queues == 0)
return -EINVAL; return -EINVAL;
fs->nvqs = 1 + fs->num_request_queues; fs->nvqs = VQ_REQUEST + fs->num_request_queues;
fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
if (!fs->vqs) if (!fs->vqs)
return -ENOMEM; return -ENOMEM;
...@@ -625,29 +694,17 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, ...@@ -625,29 +694,17 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
goto out; goto out;
} }
/* Initialize the hiprio/forget request virtqueue */
callbacks[VQ_HIPRIO] = virtio_fs_vq_done; callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name), virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO);
"hiprio");
names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
virtio_fs_hiprio_dispatch_work);
init_completion(&fs->vqs[VQ_HIPRIO].in_flight_zero);
spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
/* Initialize the requests virtqueues */ /* Initialize the requests virtqueues */
for (i = VQ_REQUEST; i < fs->nvqs; i++) { for (i = VQ_REQUEST; i < fs->nvqs; i++) {
spin_lock_init(&fs->vqs[i].lock); char vq_name[VQ_NAME_LEN];
INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST);
virtio_fs_request_dispatch_work); virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST);
INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
init_completion(&fs->vqs[i].in_flight_zero);
snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
"requests.%u", i - VQ_REQUEST);
callbacks[i] = virtio_fs_vq_done; callbacks[i] = virtio_fs_vq_done;
names[i] = fs->vqs[i].name; names[i] = fs->vqs[i].name;
} }
...@@ -676,6 +733,130 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, ...@@ -676,6 +733,130 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
} }
/* Map a window offset to a page frame number. The window offset will have
* been produced by .iomap_begin(), which maps a file offset to a window
* offset.
*/
static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
struct virtio_fs *fs = dax_get_private(dax_dev);
phys_addr_t offset = PFN_PHYS(pgoff);
size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff;
if (kaddr)
*kaddr = fs->window_kaddr + offset;
if (pfn)
*pfn = phys_to_pfn_t(fs->window_phys_addr + offset,
PFN_DEV | PFN_MAP);
return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
}
static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
return copy_to_iter(addr, bytes, i);
}
static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
pgoff_t pgoff, size_t nr_pages)
{
long rc;
void *kaddr;
rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
if (rc < 0)
return rc;
memset(kaddr, 0, nr_pages << PAGE_SHIFT);
dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
return 0;
}
static const struct dax_operations virtio_fs_dax_ops = {
.direct_access = virtio_fs_direct_access,
.copy_from_iter = virtio_fs_copy_from_iter,
.copy_to_iter = virtio_fs_copy_to_iter,
.zero_page_range = virtio_fs_zero_page_range,
};
static void virtio_fs_cleanup_dax(void *data)
{
struct dax_device *dax_dev = data;
kill_dax(dax_dev);
put_dax(dax_dev);
}
static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
{
struct virtio_shm_region cache_reg;
struct dev_pagemap *pgmap;
bool have_cache;
if (!IS_ENABLED(CONFIG_FUSE_DAX))
return 0;
/* Get cache region */
have_cache = virtio_get_shm_region(vdev, &cache_reg,
(u8)VIRTIO_FS_SHMCAP_ID_CACHE);
if (!have_cache) {
dev_notice(&vdev->dev, "%s: No cache capability\n", __func__);
return 0;
}
if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len,
dev_name(&vdev->dev))) {
dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n",
cache_reg.addr, cache_reg.len);
return -EBUSY;
}
dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len,
cache_reg.addr);
pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL);
if (!pgmap)
return -ENOMEM;
pgmap->type = MEMORY_DEVICE_FS_DAX;
/* Ideally we would directly use the PCI BAR resource but
* devm_memremap_pages() wants its own copy in pgmap. So
* initialize a struct resource from scratch (only the start
* and end fields will be used).
*/
pgmap->range = (struct range) {
.start = (phys_addr_t) cache_reg.addr,
.end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1,
};
pgmap->nr_range = 1;
fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap);
if (IS_ERR(fs->window_kaddr))
return PTR_ERR(fs->window_kaddr);
fs->window_phys_addr = (phys_addr_t) cache_reg.addr;
fs->window_len = (phys_addr_t) cache_reg.len;
dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0);
if (IS_ERR(fs->dax_dev))
return PTR_ERR(fs->dax_dev);
return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
fs->dax_dev);
}
static int virtio_fs_probe(struct virtio_device *vdev) static int virtio_fs_probe(struct virtio_device *vdev)
{ {
struct virtio_fs *fs; struct virtio_fs *fs;
...@@ -697,6 +878,10 @@ static int virtio_fs_probe(struct virtio_device *vdev) ...@@ -697,6 +878,10 @@ static int virtio_fs_probe(struct virtio_device *vdev)
/* TODO vq affinity */ /* TODO vq affinity */
ret = virtio_fs_setup_dax(vdev, fs);
if (ret < 0)
goto out_vqs;
/* Bring the device online in case the filesystem is mounted and /* Bring the device online in case the filesystem is mounted and
* requests need to be sent before we return. * requests need to be sent before we return.
*/ */
...@@ -833,18 +1018,37 @@ __releases(fiq->lock) ...@@ -833,18 +1018,37 @@ __releases(fiq->lock)
spin_unlock(&fiq->lock); spin_unlock(&fiq->lock);
} }
/* Count number of scatter-gather elements required */
static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs,
unsigned int num_pages,
unsigned int total_len)
{
unsigned int i;
unsigned int this_len;
for (i = 0; i < num_pages && total_len; i++) {
this_len = min(page_descs[i].length, total_len);
total_len -= this_len;
}
return i;
}
/* Return the number of scatter-gather list elements required */ /* Return the number of scatter-gather list elements required */
static unsigned int sg_count_fuse_req(struct fuse_req *req) static unsigned int sg_count_fuse_req(struct fuse_req *req)
{ {
struct fuse_args *args = req->args; struct fuse_args *args = req->args;
struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
unsigned int total_sgs = 1 /* fuse_in_header */; unsigned int size, total_sgs = 1 /* fuse_in_header */;
if (args->in_numargs - args->in_pages) if (args->in_numargs - args->in_pages)
total_sgs += 1; total_sgs += 1;
if (args->in_pages) if (args->in_pages) {
total_sgs += ap->num_pages; size = args->in_args[args->in_numargs - 1].size;
total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
size);
}
if (!test_bit(FR_ISREPLY, &req->flags)) if (!test_bit(FR_ISREPLY, &req->flags))
return total_sgs; return total_sgs;
...@@ -854,8 +1058,11 @@ static unsigned int sg_count_fuse_req(struct fuse_req *req) ...@@ -854,8 +1058,11 @@ static unsigned int sg_count_fuse_req(struct fuse_req *req)
if (args->out_numargs - args->out_pages) if (args->out_numargs - args->out_pages)
total_sgs += 1; total_sgs += 1;
if (args->out_pages) if (args->out_pages) {
total_sgs += ap->num_pages; size = args->out_args[args->out_numargs - 1].size;
total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
size);
}
return total_sgs; return total_sgs;
} }
...@@ -1071,24 +1278,28 @@ static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { ...@@ -1071,24 +1278,28 @@ static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
.release = virtio_fs_fiq_release, .release = virtio_fs_fiq_release,
}; };
static int virtio_fs_fill_super(struct super_block *sb) static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx)
{
ctx->rootmode = S_IFDIR;
ctx->default_permissions = 1;
ctx->allow_other = 1;
ctx->max_read = UINT_MAX;
ctx->blksize = 512;
ctx->destroy = true;
ctx->no_control = true;
ctx->no_force_umount = true;
}
static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
{ {
struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_mount *fm = get_fuse_mount_super(sb);
struct fuse_conn *fc = fm->fc;
struct virtio_fs *fs = fc->iq.priv; struct virtio_fs *fs = fc->iq.priv;
struct fuse_fs_context *ctx = fsc->fs_private;
unsigned int i; unsigned int i;
int err; int err;
struct fuse_fs_context ctx = {
.rootmode = S_IFDIR,
.default_permissions = 1,
.allow_other = 1,
.max_read = UINT_MAX,
.blksize = 512,
.destroy = true,
.no_control = true,
.no_force_umount = true,
.no_mount_options = true,
};
virtio_fs_ctx_set_defaults(ctx);
mutex_lock(&virtio_fs_mutex); mutex_lock(&virtio_fs_mutex);
/* After holding mutex, make sure virtiofs device is still there. /* After holding mutex, make sure virtiofs device is still there.
...@@ -1112,8 +1323,10 @@ static int virtio_fs_fill_super(struct super_block *sb) ...@@ -1112,8 +1323,10 @@ static int virtio_fs_fill_super(struct super_block *sb)
} }
/* virtiofs allocates and installs its own fuse devices */ /* virtiofs allocates and installs its own fuse devices */
ctx.fudptr = NULL; ctx->fudptr = NULL;
err = fuse_fill_super_common(sb, &ctx); if (ctx->dax)
ctx->dax_dev = fs->dax_dev;
err = fuse_fill_super_common(sb, ctx);
if (err < 0) if (err < 0)
goto err_free_fuse_devs; goto err_free_fuse_devs;
...@@ -1125,7 +1338,7 @@ static int virtio_fs_fill_super(struct super_block *sb) ...@@ -1125,7 +1338,7 @@ static int virtio_fs_fill_super(struct super_block *sb)
/* Previous unmount will stop all queues. Start these again */ /* Previous unmount will stop all queues. Start these again */
virtio_fs_start_all_queues(fs); virtio_fs_start_all_queues(fs);
fuse_send_init(fc); fuse_send_init(fm);
mutex_unlock(&virtio_fs_mutex); mutex_unlock(&virtio_fs_mutex);
return 0; return 0;
...@@ -1136,18 +1349,17 @@ static int virtio_fs_fill_super(struct super_block *sb) ...@@ -1136,18 +1349,17 @@ static int virtio_fs_fill_super(struct super_block *sb)
return err; return err;
} }
static void virtio_kill_sb(struct super_block *sb) static void virtio_fs_conn_destroy(struct fuse_mount *fm)
{ {
struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_conn *fc = fm->fc;
struct virtio_fs *vfs; struct virtio_fs *vfs = fc->iq.priv;
struct virtio_fs_vq *fsvq; struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO];
/* If mount failed, we can still be called without any fc */ /* Stop dax worker. Soon evict_inodes() will be called which
if (!fc) * will free all memory ranges belonging to all inodes.
return fuse_kill_sb_anon(sb); */
if (IS_ENABLED(CONFIG_FUSE_DAX))
vfs = fc->iq.priv; fuse_dax_cancel_work(fc);
fsvq = &vfs->vqs[VQ_HIPRIO];
/* Stop forget queue. Soon destroy will be sent */ /* Stop forget queue. Soon destroy will be sent */
spin_lock(&fsvq->lock); spin_lock(&fsvq->lock);
...@@ -1155,9 +1367,9 @@ static void virtio_kill_sb(struct super_block *sb) ...@@ -1155,9 +1367,9 @@ static void virtio_kill_sb(struct super_block *sb)
spin_unlock(&fsvq->lock); spin_unlock(&fsvq->lock);
virtio_fs_drain_all_queues(vfs); virtio_fs_drain_all_queues(vfs);
fuse_kill_sb_anon(sb); fuse_conn_destroy(fm);
/* fuse_kill_sb_anon() must have sent destroy. Stop all queues /* fuse_conn_destroy() must have sent destroy. Stop all queues
* and drain one more time and free fuse devices. Freeing fuse * and drain one more time and free fuse devices. Freeing fuse
* devices will drop their reference on fuse_conn and that in * devices will drop their reference on fuse_conn and that in
* turn will drop its reference on virtio_fs object. * turn will drop its reference on virtio_fs object.
...@@ -1167,12 +1379,27 @@ static void virtio_kill_sb(struct super_block *sb) ...@@ -1167,12 +1379,27 @@ static void virtio_kill_sb(struct super_block *sb)
virtio_fs_free_devs(vfs); virtio_fs_free_devs(vfs);
} }
static void virtio_kill_sb(struct super_block *sb)
{
struct fuse_mount *fm = get_fuse_mount_super(sb);
bool last;
/* If mount failed, we can still be called without any fc */
if (fm) {
last = fuse_mount_remove(fm);
if (last)
virtio_fs_conn_destroy(fm);
}
kill_anon_super(sb);
}
static int virtio_fs_test_super(struct super_block *sb, static int virtio_fs_test_super(struct super_block *sb,
struct fs_context *fsc) struct fs_context *fsc)
{ {
struct fuse_conn *fc = fsc->s_fs_info; struct fuse_mount *fsc_fm = fsc->s_fs_info;
struct fuse_mount *sb_fm = get_fuse_mount_super(sb);
return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv; return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv;
} }
static int virtio_fs_set_super(struct super_block *sb, static int virtio_fs_set_super(struct super_block *sb,
...@@ -1182,7 +1409,7 @@ static int virtio_fs_set_super(struct super_block *sb, ...@@ -1182,7 +1409,7 @@ static int virtio_fs_set_super(struct super_block *sb,
err = get_anon_bdev(&sb->s_dev); err = get_anon_bdev(&sb->s_dev);
if (!err) if (!err)
fuse_conn_get(fsc->s_fs_info); fuse_mount_get(fsc->s_fs_info);
return err; return err;
} }
...@@ -1192,6 +1419,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc) ...@@ -1192,6 +1419,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
struct virtio_fs *fs; struct virtio_fs *fs;
struct super_block *sb; struct super_block *sb;
struct fuse_conn *fc; struct fuse_conn *fc;
struct fuse_mount *fm;
int err; int err;
/* This gets a reference on virtio_fs object. This ptr gets installed /* This gets a reference on virtio_fs object. This ptr gets installed
...@@ -1212,19 +1440,29 @@ static int virtio_fs_get_tree(struct fs_context *fsc) ...@@ -1212,19 +1440,29 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
return -ENOMEM; return -ENOMEM;
} }
fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops, fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
fs); if (!fm) {
mutex_lock(&virtio_fs_mutex);
virtio_fs_put(fs);
mutex_unlock(&virtio_fs_mutex);
kfree(fc);
return -ENOMEM;
}
fuse_conn_init(fc, fm, get_user_ns(current_user_ns()),
&virtio_fs_fiq_ops, fs);
fc->release = fuse_free_conn; fc->release = fuse_free_conn;
fc->delete_stale = true; fc->delete_stale = true;
fc->auto_submounts = true;
fsc->s_fs_info = fc; fsc->s_fs_info = fm;
sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
fuse_conn_put(fc); fuse_mount_put(fm);
if (IS_ERR(sb)) if (IS_ERR(sb))
return PTR_ERR(sb); return PTR_ERR(sb);
if (!sb->s_root) { if (!sb->s_root) {
err = virtio_fs_fill_super(sb); err = virtio_fs_fill_super(sb, fsc);
if (err) { if (err) {
deactivate_locked_super(sb); deactivate_locked_super(sb);
return err; return err;
...@@ -1239,11 +1477,19 @@ static int virtio_fs_get_tree(struct fs_context *fsc) ...@@ -1239,11 +1477,19 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
} }
static const struct fs_context_operations virtio_fs_context_ops = { static const struct fs_context_operations virtio_fs_context_ops = {
.free = virtio_fs_free_fc,
.parse_param = virtio_fs_parse_param,
.get_tree = virtio_fs_get_tree, .get_tree = virtio_fs_get_tree,
}; };
static int virtio_fs_init_fs_context(struct fs_context *fsc) static int virtio_fs_init_fs_context(struct fs_context *fsc)
{ {
struct fuse_fs_context *ctx;
ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
fsc->fs_private = ctx;
fsc->ops = &virtio_fs_context_ops; fsc->ops = &virtio_fs_context_ops;
return 0; return 0;
} }
......
...@@ -14,12 +14,12 @@ ...@@ -14,12 +14,12 @@
int fuse_setxattr(struct inode *inode, const char *name, const void *value, int fuse_setxattr(struct inode *inode, const char *name, const void *value,
size_t size, int flags) size_t size, int flags)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_setxattr_in inarg; struct fuse_setxattr_in inarg;
int err; int err;
if (fc->no_setxattr) if (fm->fc->no_setxattr)
return -EOPNOTSUPP; return -EOPNOTSUPP;
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
...@@ -34,9 +34,9 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value, ...@@ -34,9 +34,9 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
args.in_args[1].value = name; args.in_args[1].value = name;
args.in_args[2].size = size; args.in_args[2].size = size;
args.in_args[2].value = value; args.in_args[2].value = value;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_setxattr = 1; fm->fc->no_setxattr = 1;
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
} }
if (!err) { if (!err) {
...@@ -49,13 +49,13 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value, ...@@ -49,13 +49,13 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
size_t size) size_t size)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_getxattr_in inarg; struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg; struct fuse_getxattr_out outarg;
ssize_t ret; ssize_t ret;
if (fc->no_getxattr) if (fm->fc->no_getxattr)
return -EOPNOTSUPP; return -EOPNOTSUPP;
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
...@@ -77,11 +77,11 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, ...@@ -77,11 +77,11 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
args.out_args[0].size = sizeof(outarg); args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg; args.out_args[0].value = &outarg;
} }
ret = fuse_simple_request(fc, &args); ret = fuse_simple_request(fm, &args);
if (!ret && !size) if (!ret && !size)
ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX); ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX);
if (ret == -ENOSYS) { if (ret == -ENOSYS) {
fc->no_getxattr = 1; fm->fc->no_getxattr = 1;
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
} }
return ret; return ret;
...@@ -107,16 +107,16 @@ static int fuse_verify_xattr_list(char *list, size_t size) ...@@ -107,16 +107,16 @@ static int fuse_verify_xattr_list(char *list, size_t size)
ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
{ {
struct inode *inode = d_inode(entry); struct inode *inode = d_inode(entry);
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
struct fuse_getxattr_in inarg; struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg; struct fuse_getxattr_out outarg;
ssize_t ret; ssize_t ret;
if (!fuse_allow_current_process(fc)) if (!fuse_allow_current_process(fm->fc))
return -EACCES; return -EACCES;
if (fc->no_listxattr) if (fm->fc->no_listxattr)
return -EOPNOTSUPP; return -EOPNOTSUPP;
memset(&inarg, 0, sizeof(inarg)); memset(&inarg, 0, sizeof(inarg));
...@@ -136,13 +136,13 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) ...@@ -136,13 +136,13 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
args.out_args[0].size = sizeof(outarg); args.out_args[0].size = sizeof(outarg);
args.out_args[0].value = &outarg; args.out_args[0].value = &outarg;
} }
ret = fuse_simple_request(fc, &args); ret = fuse_simple_request(fm, &args);
if (!ret && !size) if (!ret && !size)
ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX); ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX);
if (ret > 0 && size) if (ret > 0 && size)
ret = fuse_verify_xattr_list(list, ret); ret = fuse_verify_xattr_list(list, ret);
if (ret == -ENOSYS) { if (ret == -ENOSYS) {
fc->no_listxattr = 1; fm->fc->no_listxattr = 1;
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
} }
return ret; return ret;
...@@ -150,11 +150,11 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) ...@@ -150,11 +150,11 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
int fuse_removexattr(struct inode *inode, const char *name) int fuse_removexattr(struct inode *inode, const char *name)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args); FUSE_ARGS(args);
int err; int err;
if (fc->no_removexattr) if (fm->fc->no_removexattr)
return -EOPNOTSUPP; return -EOPNOTSUPP;
args.opcode = FUSE_REMOVEXATTR; args.opcode = FUSE_REMOVEXATTR;
...@@ -162,9 +162,9 @@ int fuse_removexattr(struct inode *inode, const char *name) ...@@ -162,9 +162,9 @@ int fuse_removexattr(struct inode *inode, const char *name)
args.in_numargs = 1; args.in_numargs = 1;
args.in_args[0].size = strlen(name) + 1; args.in_args[0].size = strlen(name) + 1;
args.in_args[0].value = name; args.in_args[0].value = name;
err = fuse_simple_request(fc, &args); err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) { if (err == -ENOSYS) {
fc->no_removexattr = 1; fm->fc->no_removexattr = 1;
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
} }
if (!err) { if (!err) {
......
...@@ -149,6 +149,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, ...@@ -149,6 +149,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
struct dax_device *dax_dev, struct writeback_control *wbc); struct dax_device *dax_dev, struct writeback_control *wbc);
struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page(struct address_space *mapping);
struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
dax_entry_t dax_lock_page(struct page *page); dax_entry_t dax_lock_page(struct page *page);
void dax_unlock_page(struct page *page, dax_entry_t cookie); void dax_unlock_page(struct page *page, dax_entry_t cookie);
#else #else
...@@ -179,6 +180,11 @@ static inline struct page *dax_layout_busy_page(struct address_space *mapping) ...@@ -179,6 +180,11 @@ static inline struct page *dax_layout_busy_page(struct address_space *mapping)
return NULL; return NULL;
} }
static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages)
{
return NULL;
}
static inline int dax_writeback_mapping_range(struct address_space *mapping, static inline int dax_writeback_mapping_range(struct address_space *mapping,
struct dax_device *dax_dev, struct writeback_control *wbc) struct dax_device *dax_dev, struct writeback_control *wbc)
{ {
......
...@@ -172,6 +172,9 @@ ...@@ -172,6 +172,9 @@
* - add FUSE_WRITE_KILL_PRIV flag * - add FUSE_WRITE_KILL_PRIV flag
* - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
* - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
*
* 7.32
* - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS
*/ */
#ifndef _LINUX_FUSE_H #ifndef _LINUX_FUSE_H
...@@ -207,7 +210,7 @@ ...@@ -207,7 +210,7 @@
#define FUSE_KERNEL_VERSION 7 #define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */ /** Minor version number of this interface */
#define FUSE_KERNEL_MINOR_VERSION 31 #define FUSE_KERNEL_MINOR_VERSION 32
/** The node ID of the root inode */ /** The node ID of the root inode */
#define FUSE_ROOT_ID 1 #define FUSE_ROOT_ID 1
...@@ -231,7 +234,7 @@ struct fuse_attr { ...@@ -231,7 +234,7 @@ struct fuse_attr {
uint32_t gid; uint32_t gid;
uint32_t rdev; uint32_t rdev;
uint32_t blksize; uint32_t blksize;
uint32_t padding; uint32_t flags;
}; };
struct fuse_kstatfs { struct fuse_kstatfs {
...@@ -313,7 +316,10 @@ struct fuse_file_lock { ...@@ -313,7 +316,10 @@ struct fuse_file_lock {
* FUSE_CACHE_SYMLINKS: cache READLINK responses * FUSE_CACHE_SYMLINKS: cache READLINK responses
* FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
* FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
* FUSE_MAP_ALIGNMENT: map_alignment field is valid * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for
* foffset and moffset fields in struct
* fuse_setupmapping_out and fuse_removemapping_one.
* FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts
*/ */
#define FUSE_ASYNC_READ (1 << 0) #define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1) #define FUSE_POSIX_LOCKS (1 << 1)
...@@ -342,6 +348,7 @@ struct fuse_file_lock { ...@@ -342,6 +348,7 @@ struct fuse_file_lock {
#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) #define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
#define FUSE_EXPLICIT_INVAL_DATA (1 << 25) #define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
#define FUSE_MAP_ALIGNMENT (1 << 26) #define FUSE_MAP_ALIGNMENT (1 << 26)
#define FUSE_SUBMOUNTS (1 << 27)
/** /**
* CUSE INIT request/reply flags * CUSE INIT request/reply flags
...@@ -417,6 +424,13 @@ struct fuse_file_lock { ...@@ -417,6 +424,13 @@ struct fuse_file_lock {
*/ */
#define FUSE_FSYNC_FDATASYNC (1 << 0) #define FUSE_FSYNC_FDATASYNC (1 << 0)
/**
* fuse_attr flags
*
* FUSE_ATTR_SUBMOUNT: Object is a submount root
*/
#define FUSE_ATTR_SUBMOUNT (1 << 0)
enum fuse_opcode { enum fuse_opcode {
FUSE_LOOKUP = 1, FUSE_LOOKUP = 1,
FUSE_FORGET = 2, /* no reply */ FUSE_FORGET = 2, /* no reply */
...@@ -892,4 +906,34 @@ struct fuse_copy_file_range_in { ...@@ -892,4 +906,34 @@ struct fuse_copy_file_range_in {
uint64_t flags; uint64_t flags;
}; };
#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1)
struct fuse_setupmapping_in {
/* An already open handle */
uint64_t fh;
/* Offset into the file to start the mapping */
uint64_t foffset;
/* Length of mapping required */
uint64_t len;
/* Flags, FUSE_SETUPMAPPING_FLAG_* */
uint64_t flags;
/* Offset in Memory Window */
uint64_t moffset;
};
struct fuse_removemapping_in {
/* number of fuse_removemapping_one follows */
uint32_t count;
};
struct fuse_removemapping_one {
/* Offset into the dax window start the unmapping */
uint64_t moffset;
/* Length of mapping required */
uint64_t len;
};
#define FUSE_REMOVEMAPPING_MAX_ENTRY \
(PAGE_SIZE / sizeof(struct fuse_removemapping_one))
#endif /* _LINUX_FUSE_H */ #endif /* _LINUX_FUSE_H */
...@@ -16,4 +16,7 @@ struct virtio_fs_config { ...@@ -16,4 +16,7 @@ struct virtio_fs_config {
__le32 num_request_queues; __le32 num_request_queues;
} __attribute__((packed)); } __attribute__((packed));
/* For the id field in virtio_pci_shm_cap */
#define VIRTIO_FS_SHMCAP_ID_CACHE 0
#endif /* _UAPI_LINUX_VIRTIO_FS_H */ #endif /* _UAPI_LINUX_VIRTIO_FS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment