Commit f153fbe1 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'erofs-for-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "In this cycle, we introduce compressed inode support over fscache
  since a lot of native EROFS images are explicitly compressed so that
  EROFS over fscache can be more widely used even without Dragonfly
  Nydus [1].

  Apart from that, there are some folio conversions for compressed
  inodes available as well as a lockdep false positive fix.

  Summary:

   - Some folio conversions for compressed inodes;

   - Add compressed inode support over fscache;

   - Fix lockdep false positives of erofs_pseudo_mnt"

Link: https://nydus.dev [1]

* tag 'erofs-for-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: support compressed inodes over fscache
  erofs: make iov_iter describe target buffers over fscache
  erofs: fix lockdep false positives on initializing erofs_pseudo_mnt
  erofs: refine managed cache operations to folios
  erofs: convert z_erofs_submissionqueue_endio() to folios
  erofs: convert z_erofs_fill_bio_vec() to folios
  erofs: get rid of `justfound` debugging tag
  erofs: convert z_erofs_do_read_page() to folios
  erofs: convert z_erofs_onlinepage_.* to folios
parents d453cc5a a1bafc31
...@@ -81,13 +81,6 @@ static inline bool z_erofs_put_shortlivedpage(struct page **pagepool, ...@@ -81,13 +81,6 @@ static inline bool z_erofs_put_shortlivedpage(struct page **pagepool,
return true; return true;
} }
#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
struct page *page)
{
return page->mapping == MNGD_MAPPING(sbi);
}
int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize); unsigned int padbufsize);
extern const struct z_erofs_decompressor erofs_decompressors[]; extern const struct z_erofs_decompressor erofs_decompressors[];
......
...@@ -212,9 +212,6 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, ...@@ -212,9 +212,6 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
if (rq->out[no] != rq->in[j]) if (rq->out[no] != rq->in[j])
continue; continue;
DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
rq->in[j]));
tmppage = erofs_allocpage(pgpl, rq->gfp); tmppage = erofs_allocpage(pgpl, rq->gfp);
if (!tmppage) { if (!tmppage) {
err = -ENOMEM; err = -ENOMEM;
......
...@@ -258,9 +258,6 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, ...@@ -258,9 +258,6 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
if (rq->out[no] != rq->in[j]) if (rq->out[no] != rq->in[j])
continue; continue;
DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
rq->in[j]));
tmppage = erofs_allocpage(pgpl, rq->gfp); tmppage = erofs_allocpage(pgpl, rq->gfp);
if (!tmppage) { if (!tmppage) {
err = -ENOMEM; err = -ENOMEM;
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
* Copyright (C) 2022, Alibaba Cloud * Copyright (C) 2022, Alibaba Cloud
* Copyright (C) 2022, Bytedance Inc. All rights reserved. * Copyright (C) 2022, Bytedance Inc. All rights reserved.
*/ */
#include <linux/pseudo_fs.h>
#include <linux/fscache.h> #include <linux/fscache.h>
#include "internal.h" #include "internal.h"
...@@ -12,9 +13,27 @@ static LIST_HEAD(erofs_domain_list); ...@@ -12,9 +13,27 @@ static LIST_HEAD(erofs_domain_list);
static LIST_HEAD(erofs_domain_cookies_list); static LIST_HEAD(erofs_domain_cookies_list);
static struct vfsmount *erofs_pseudo_mnt; static struct vfsmount *erofs_pseudo_mnt;
struct erofs_fscache_request { static int erofs_anon_init_fs_context(struct fs_context *fc)
struct erofs_fscache_request *primary; {
struct netfs_cache_resources cache_resources; return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type erofs_anon_fs_type = {
.owner = THIS_MODULE,
.name = "pseudo_erofs",
.init_fs_context = erofs_anon_init_fs_context,
.kill_sb = kill_anon_super,
};
struct erofs_fscache_io {
struct netfs_cache_resources cres;
struct iov_iter iter;
netfs_io_terminated_t end_io;
void *private;
refcount_t ref;
};
struct erofs_fscache_rq {
struct address_space *mapping; /* The mapping being accessed */ struct address_space *mapping; /* The mapping being accessed */
loff_t start; /* Start position */ loff_t start; /* Start position */
size_t len; /* Length of the request */ size_t len; /* Length of the request */
...@@ -23,44 +42,17 @@ struct erofs_fscache_request { ...@@ -23,44 +42,17 @@ struct erofs_fscache_request {
refcount_t ref; refcount_t ref;
}; };
static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping, static bool erofs_fscache_io_put(struct erofs_fscache_io *io)
loff_t start, size_t len)
{
struct erofs_fscache_request *req;
req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);
req->mapping = mapping;
req->start = start;
req->len = len;
refcount_set(&req->ref, 1);
return req;
}
static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
size_t len)
{ {
struct erofs_fscache_request *req; if (!refcount_dec_and_test(&io->ref))
return false;
/* use primary request for the first submission */ if (io->cres.ops)
if (!primary->submitted) { io->cres.ops->end_operation(&io->cres);
refcount_inc(&primary->ref); kfree(io);
return primary; return true;
}
req = erofs_fscache_req_alloc(primary->mapping,
primary->start + primary->submitted, len);
if (!IS_ERR(req)) {
req->primary = primary;
refcount_inc(&primary->ref);
}
return req;
} }
static void erofs_fscache_req_complete(struct erofs_fscache_request *req) static void erofs_fscache_req_complete(struct erofs_fscache_rq *req)
{ {
struct folio *folio; struct folio *folio;
bool failed = req->error; bool failed = req->error;
...@@ -80,120 +72,196 @@ static void erofs_fscache_req_complete(struct erofs_fscache_request *req) ...@@ -80,120 +72,196 @@ static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
rcu_read_unlock(); rcu_read_unlock();
} }
static void erofs_fscache_req_put(struct erofs_fscache_request *req) static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
{ {
if (refcount_dec_and_test(&req->ref)) { if (!refcount_dec_and_test(&req->ref))
if (req->cache_resources.ops) return;
req->cache_resources.ops->end_operation(&req->cache_resources);
if (!req->primary)
erofs_fscache_req_complete(req); erofs_fscache_req_complete(req);
else
erofs_fscache_req_put(req->primary);
kfree(req); kfree(req);
}
} }
static void erofs_fscache_subreq_complete(void *priv, static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping,
loff_t start, size_t len)
{
struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
return NULL;
req->mapping = mapping;
req->start = start;
req->len = len;
refcount_set(&req->ref, 1);
return req;
}
static void erofs_fscache_req_io_put(struct erofs_fscache_io *io)
{
struct erofs_fscache_rq *req = io->private;
if (erofs_fscache_io_put(io))
erofs_fscache_req_put(req);
}
static void erofs_fscache_req_end_io(void *priv,
ssize_t transferred_or_error, bool was_async) ssize_t transferred_or_error, bool was_async)
{ {
struct erofs_fscache_request *req = priv; struct erofs_fscache_io *io = priv;
struct erofs_fscache_rq *req = io->private;
if (IS_ERR_VALUE(transferred_or_error)) { if (IS_ERR_VALUE(transferred_or_error))
if (req->primary)
req->primary->error = transferred_or_error;
else
req->error = transferred_or_error; req->error = transferred_or_error;
} erofs_fscache_req_io_put(io);
erofs_fscache_req_put(req); }
static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req)
{
struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL);
if (!io)
return NULL;
io->end_io = erofs_fscache_req_end_io;
io->private = req;
refcount_inc(&req->ref);
refcount_set(&io->ref, 1);
return io;
} }
/* /*
* Read data from fscache (cookie, pstart, len), and fill the read data into * Read data from fscache described by cookie at pstart physical address
* page cache described by (req->mapping, lstart, len). @pstart describeis the * offset, and fill the read data into buffer described by io->iter.
* start physical address in the cache file.
*/ */
static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, static int erofs_fscache_read_io_async(struct fscache_cookie *cookie,
struct erofs_fscache_request *req, loff_t pstart, size_t len) loff_t pstart, struct erofs_fscache_io *io)
{ {
enum netfs_io_source source; enum netfs_io_source source;
struct super_block *sb = req->mapping->host->i_sb; struct netfs_cache_resources *cres = &io->cres;
struct netfs_cache_resources *cres = &req->cache_resources; struct iov_iter *iter = &io->iter;
struct iov_iter iter;
loff_t lstart = req->start + req->submitted;
size_t done = 0;
int ret; int ret;
DBG_BUGON(len > req->len - req->submitted);
ret = fscache_begin_read_operation(cres, cookie); ret = fscache_begin_read_operation(cres, cookie);
if (ret) if (ret)
return ret; return ret;
while (done < len) { while (iov_iter_count(iter)) {
loff_t sstart = pstart + done; size_t orig_count = iov_iter_count(iter), len = orig_count;
size_t slen = len - done;
unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
source = cres->ops->prepare_ondemand_read(cres, source = cres->ops->prepare_ondemand_read(cres,
sstart, &slen, LLONG_MAX, &flags, 0); pstart, &len, LLONG_MAX, &flags, 0);
if (WARN_ON(slen == 0)) if (WARN_ON(len == 0))
source = NETFS_INVALID_READ; source = NETFS_INVALID_READ;
if (source != NETFS_READ_FROM_CACHE) { if (source != NETFS_READ_FROM_CACHE) {
erofs_err(sb, "failed to fscache prepare_read (source %d)", source); erofs_err(NULL, "prepare_read failed (source %d)", source);
return -EIO; return -EIO;
} }
refcount_inc(&req->ref); iov_iter_truncate(iter, len);
iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages, refcount_inc(&io->ref);
lstart + done, slen); ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL,
io->end_io, io);
ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
erofs_fscache_subreq_complete, req);
if (ret == -EIOCBQUEUED) if (ret == -EIOCBQUEUED)
ret = 0; ret = 0;
if (ret) { if (ret) {
erofs_err(sb, "failed to fscache_read (ret %d)", ret); erofs_err(NULL, "fscache_read failed (ret %d)", ret);
return ret; return ret;
} }
if (WARN_ON(iov_iter_count(iter)))
return -EIO;
done += slen; iov_iter_reexpand(iter, orig_count - len);
pstart += len;
} }
DBG_BUGON(done != len);
return 0; return 0;
} }
static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) struct erofs_fscache_bio {
struct erofs_fscache_io io;
struct bio bio; /* w/o bdev to share bio_add_page/endio() */
struct bio_vec bvecs[BIO_MAX_VECS];
};
static void erofs_fscache_bio_endio(void *priv,
ssize_t transferred_or_error, bool was_async)
{
struct erofs_fscache_bio *io = priv;
if (IS_ERR_VALUE(transferred_or_error))
io->bio.bi_status = errno_to_blk_status(transferred_or_error);
io->bio.bi_end_io(&io->bio);
BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0);
erofs_fscache_io_put(&io->io);
}
struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
{
struct erofs_fscache_bio *io;
io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
io->io.private = mdev->m_fscache->cookie;
io->io.end_io = erofs_fscache_bio_endio;
refcount_set(&io->io.ref, 1);
return &io->bio;
}
void erofs_fscache_submit_bio(struct bio *bio)
{ {
struct erofs_fscache_bio *io = container_of(bio,
struct erofs_fscache_bio, bio);
int ret; int ret;
iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt,
bio->bi_iter.bi_size);
ret = erofs_fscache_read_io_async(io->io.private,
bio->bi_iter.bi_sector << 9, &io->io);
erofs_fscache_io_put(&io->io);
if (!ret)
return;
bio->bi_status = errno_to_blk_status(ret);
bio->bi_end_io(bio);
}
static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
{
struct erofs_fscache *ctx = folio->mapping->host->i_private; struct erofs_fscache *ctx = folio->mapping->host->i_private;
struct erofs_fscache_request *req; int ret = -ENOMEM;
struct erofs_fscache_rq *req;
struct erofs_fscache_io *io;
req = erofs_fscache_req_alloc(folio->mapping, req = erofs_fscache_req_alloc(folio->mapping,
folio_pos(folio), folio_size(folio)); folio_pos(folio), folio_size(folio));
if (IS_ERR(req)) { if (!req) {
folio_unlock(folio); folio_unlock(folio);
return PTR_ERR(req); return ret;
} }
ret = erofs_fscache_read_folios_async(ctx->cookie, req, io = erofs_fscache_req_io_alloc(req);
if (!io) {
req->error = ret;
goto out;
}
iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages,
folio_pos(folio), folio_size(folio)); folio_pos(folio), folio_size(folio));
ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io);
if (ret) if (ret)
req->error = ret; req->error = ret;
erofs_fscache_req_io_put(io);
out:
erofs_fscache_req_put(req); erofs_fscache_req_put(req);
return ret; return ret;
} }
static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
{ {
struct address_space *mapping = primary->mapping; struct address_space *mapping = req->mapping;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct erofs_fscache_request *req; struct erofs_fscache_io *io;
struct erofs_map_blocks map; struct erofs_map_blocks map;
struct erofs_map_dev mdev; struct erofs_map_dev mdev;
struct iov_iter iter; loff_t pos = req->start + req->submitted;
loff_t pos = primary->start + primary->submitted;
size_t count; size_t count;
int ret; int ret;
...@@ -204,6 +272,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) ...@@ -204,6 +272,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
if (map.m_flags & EROFS_MAP_META) { if (map.m_flags & EROFS_MAP_META) {
struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct iov_iter iter;
erofs_blk_t blknr; erofs_blk_t blknr;
size_t offset, size; size_t offset, size;
void *src; void *src;
...@@ -224,15 +293,17 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) ...@@ -224,15 +293,17 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
} }
iov_iter_zero(PAGE_SIZE - size, &iter); iov_iter_zero(PAGE_SIZE - size, &iter);
erofs_put_metabuf(&buf); erofs_put_metabuf(&buf);
primary->submitted += PAGE_SIZE; req->submitted += PAGE_SIZE;
return 0; return 0;
} }
count = primary->len - primary->submitted; count = req->len - req->submitted;
if (!(map.m_flags & EROFS_MAP_MAPPED)) { if (!(map.m_flags & EROFS_MAP_MAPPED)) {
struct iov_iter iter;
iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
iov_iter_zero(count, &iter); iov_iter_zero(count, &iter);
primary->submitted += count; req->submitted += count;
return 0; return 0;
} }
...@@ -247,18 +318,19 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) ...@@ -247,18 +318,19 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
if (ret) if (ret)
return ret; return ret;
req = erofs_fscache_req_chain(primary, count); io = erofs_fscache_req_io_alloc(req);
if (IS_ERR(req)) if (!io)
return PTR_ERR(req); return -ENOMEM;
iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
mdev.m_pa + (pos - map.m_la), io);
erofs_fscache_req_io_put(io);
ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, req->submitted += count;
req, mdev.m_pa + (pos - map.m_la), count);
erofs_fscache_req_put(req);
primary->submitted += count;
return ret; return ret;
} }
static int erofs_fscache_data_read(struct erofs_fscache_request *req) static int erofs_fscache_data_read(struct erofs_fscache_rq *req)
{ {
int ret; int ret;
...@@ -267,20 +339,19 @@ static int erofs_fscache_data_read(struct erofs_fscache_request *req) ...@@ -267,20 +339,19 @@ static int erofs_fscache_data_read(struct erofs_fscache_request *req)
if (ret) if (ret)
req->error = ret; req->error = ret;
} while (!ret && req->submitted < req->len); } while (!ret && req->submitted < req->len);
return ret; return ret;
} }
static int erofs_fscache_read_folio(struct file *file, struct folio *folio) static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
{ {
struct erofs_fscache_request *req; struct erofs_fscache_rq *req;
int ret; int ret;
req = erofs_fscache_req_alloc(folio->mapping, req = erofs_fscache_req_alloc(folio->mapping,
folio_pos(folio), folio_size(folio)); folio_pos(folio), folio_size(folio));
if (IS_ERR(req)) { if (!req) {
folio_unlock(folio); folio_unlock(folio);
return PTR_ERR(req); return -ENOMEM;
} }
ret = erofs_fscache_data_read(req); ret = erofs_fscache_data_read(req);
...@@ -290,14 +361,14 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio) ...@@ -290,14 +361,14 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
static void erofs_fscache_readahead(struct readahead_control *rac) static void erofs_fscache_readahead(struct readahead_control *rac)
{ {
struct erofs_fscache_request *req; struct erofs_fscache_rq *req;
if (!readahead_count(rac)) if (!readahead_count(rac))
return; return;
req = erofs_fscache_req_alloc(rac->mapping, req = erofs_fscache_req_alloc(rac->mapping,
readahead_pos(rac), readahead_length(rac)); readahead_pos(rac), readahead_length(rac));
if (IS_ERR(req)) if (!req)
return; return;
/* The request completion will drop refs on the folios. */ /* The request completion will drop refs on the folios. */
...@@ -381,7 +452,7 @@ static int erofs_fscache_init_domain(struct super_block *sb) ...@@ -381,7 +452,7 @@ static int erofs_fscache_init_domain(struct super_block *sb)
goto out; goto out;
if (!erofs_pseudo_mnt) { if (!erofs_pseudo_mnt) {
struct vfsmount *mnt = kern_mount(&erofs_fs_type); struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
if (IS_ERR(mnt)) { if (IS_ERR(mnt)) {
err = PTR_ERR(mnt); err = PTR_ERR(mnt);
goto out; goto out;
......
...@@ -259,14 +259,12 @@ static int erofs_fill_inode(struct inode *inode) ...@@ -259,14 +259,12 @@ static int erofs_fill_inode(struct inode *inode)
if (erofs_inode_is_data_compressed(vi->datalayout)) { if (erofs_inode_is_data_compressed(vi->datalayout)) {
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
if (!erofs_is_fscache_mode(inode->i_sb)) { DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT,
DO_ONCE_LITE_IF(inode->i_sb->s_blocksize != PAGE_SIZE,
erofs_info, inode->i_sb, erofs_info, inode->i_sb,
"EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!");
inode->i_mapping->a_ops = &z_erofs_aops; inode->i_mapping->a_ops = &z_erofs_aops;
err = 0; err = 0;
goto out_unlock; goto out_unlock;
}
#endif #endif
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
goto out_unlock; goto out_unlock;
......
...@@ -385,7 +385,6 @@ struct erofs_map_dev { ...@@ -385,7 +385,6 @@ struct erofs_map_dev {
unsigned int m_deviceid; unsigned int m_deviceid;
}; };
extern struct file_system_type erofs_fs_type;
extern const struct super_operations erofs_sops; extern const struct super_operations erofs_sops;
extern const struct address_space_operations erofs_raw_access_aops; extern const struct address_space_operations erofs_raw_access_aops;
...@@ -467,7 +466,7 @@ int __init erofs_init_shrinker(void); ...@@ -467,7 +466,7 @@ int __init erofs_init_shrinker(void);
void erofs_exit_shrinker(void); void erofs_exit_shrinker(void);
int __init z_erofs_init_zip_subsystem(void); int __init z_erofs_init_zip_subsystem(void);
void z_erofs_exit_zip_subsystem(void); void z_erofs_exit_zip_subsystem(void);
int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
struct erofs_workgroup *egrp); struct erofs_workgroup *egrp);
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
int flags); int flags);
...@@ -513,6 +512,8 @@ void erofs_fscache_unregister_fs(struct super_block *sb); ...@@ -513,6 +512,8 @@ void erofs_fscache_unregister_fs(struct super_block *sb);
struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
char *name, unsigned int flags); char *name, unsigned int flags);
void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache); void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache);
struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev);
void erofs_fscache_submit_bio(struct bio *bio);
#else #else
static inline int erofs_fscache_register_fs(struct super_block *sb) static inline int erofs_fscache_register_fs(struct super_block *sb)
{ {
...@@ -530,6 +531,8 @@ struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, ...@@ -530,6 +531,8 @@ struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache) static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache)
{ {
} }
static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) { return NULL; }
static inline void erofs_fscache_submit_bio(struct bio *bio) {}
#endif #endif
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
......
...@@ -579,13 +579,6 @@ static const struct export_operations erofs_export_ops = { ...@@ -579,13 +579,6 @@ static const struct export_operations erofs_export_ops = {
.get_parent = erofs_get_parent, .get_parent = erofs_get_parent,
}; };
static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc)
{
static const struct tree_descr empty_descr = {""};
return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr);
}
static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
{ {
struct inode *inode; struct inode *inode;
...@@ -712,11 +705,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) ...@@ -712,11 +705,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return 0; return 0;
} }
static int erofs_fc_anon_get_tree(struct fs_context *fc)
{
return get_tree_nodev(fc, erofs_fc_fill_pseudo_super);
}
static int erofs_fc_get_tree(struct fs_context *fc) static int erofs_fc_get_tree(struct fs_context *fc)
{ {
struct erofs_fs_context *ctx = fc->fs_private; struct erofs_fs_context *ctx = fc->fs_private;
...@@ -789,20 +777,10 @@ static const struct fs_context_operations erofs_context_ops = { ...@@ -789,20 +777,10 @@ static const struct fs_context_operations erofs_context_ops = {
.free = erofs_fc_free, .free = erofs_fc_free,
}; };
static const struct fs_context_operations erofs_anon_context_ops = {
.get_tree = erofs_fc_anon_get_tree,
};
static int erofs_init_fs_context(struct fs_context *fc) static int erofs_init_fs_context(struct fs_context *fc)
{ {
struct erofs_fs_context *ctx; struct erofs_fs_context *ctx;
/* pseudo mount for anon inodes */
if (fc->sb_flags & SB_KERNMOUNT) {
fc->ops = &erofs_anon_context_ops;
return 0;
}
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) if (!ctx)
return -ENOMEM; return -ENOMEM;
...@@ -824,12 +802,6 @@ static void erofs_kill_sb(struct super_block *sb) ...@@ -824,12 +802,6 @@ static void erofs_kill_sb(struct super_block *sb)
{ {
struct erofs_sb_info *sbi; struct erofs_sb_info *sbi;
/* pseudo mount for anon inodes */
if (sb->s_flags & SB_KERNMOUNT) {
kill_anon_super(sb);
return;
}
if (erofs_is_fscache_mode(sb)) if (erofs_is_fscache_mode(sb))
kill_anon_super(sb); kill_anon_super(sb);
else else
...@@ -868,7 +840,7 @@ static void erofs_put_super(struct super_block *sb) ...@@ -868,7 +840,7 @@ static void erofs_put_super(struct super_block *sb)
erofs_fscache_unregister_fs(sb); erofs_fscache_unregister_fs(sb);
} }
struct file_system_type erofs_fs_type = { static struct file_system_type erofs_fs_type = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.name = "erofs", .name = "erofs",
.init_fs_context = erofs_init_fs_context, .init_fs_context = erofs_init_fs_context,
......
...@@ -129,7 +129,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, ...@@ -129,7 +129,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
* the XArray. Otherwise some cached pages could be still attached to * the XArray. Otherwise some cached pages could be still attached to
* the orphan old workgroup when the new one is available in the tree. * the orphan old workgroup when the new one is available in the tree.
*/ */
if (erofs_try_to_free_all_cached_pages(sbi, grp)) if (erofs_try_to_free_all_cached_folios(sbi, grp))
goto out; goto out;
/* /*
......
...@@ -19,7 +19,10 @@ ...@@ -19,7 +19,10 @@
typedef void *z_erofs_next_pcluster_t; typedef void *z_erofs_next_pcluster_t;
struct z_erofs_bvec { struct z_erofs_bvec {
union {
struct page *page; struct page *page;
struct folio *folio;
};
int offset; int offset;
unsigned int end; unsigned int end;
}; };
...@@ -116,47 +119,46 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) ...@@ -116,47 +119,46 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT; return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT;
} }
#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo)
{
return fo->mapping == MNGD_MAPPING(sbi);
}
/* /*
* bit 30: I/O error occurred on this page * bit 30: I/O error occurred on this folio
* bit 0 - 29: remaining parts to complete this page * bit 0 - 29: remaining parts to complete this folio
*/ */
#define Z_EROFS_PAGE_EIO (1 << 30) #define Z_EROFS_FOLIO_EIO (1 << 30)
static inline void z_erofs_onlinepage_init(struct page *page) static void z_erofs_onlinefolio_init(struct folio *folio)
{ {
union { union {
atomic_t o; atomic_t o;
unsigned long v; void *v;
} u = { .o = ATOMIC_INIT(1) }; } u = { .o = ATOMIC_INIT(1) };
set_page_private(page, u.v); folio->private = u.v; /* valid only if file-backed folio is locked */
smp_wmb();
SetPagePrivate(page);
} }
static inline void z_erofs_onlinepage_split(struct page *page) static void z_erofs_onlinefolio_split(struct folio *folio)
{ {
atomic_inc((atomic_t *)&page->private); atomic_inc((atomic_t *)&folio->private);
} }
static void z_erofs_onlinepage_endio(struct page *page, int err) static void z_erofs_onlinefolio_end(struct folio *folio, int err)
{ {
int orig, v; int orig, v;
DBG_BUGON(!PagePrivate(page));
do { do {
orig = atomic_read((atomic_t *)&page->private); orig = atomic_read((atomic_t *)&folio->private);
v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0); v = (orig - 1) | (err ? Z_EROFS_FOLIO_EIO : 0);
} while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig); } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
if (!(v & ~Z_EROFS_PAGE_EIO)) { if (v & ~Z_EROFS_FOLIO_EIO)
set_page_private(page, 0); return;
ClearPagePrivate(page); folio->private = 0;
if (!(v & Z_EROFS_PAGE_EIO)) folio_end_read(folio, !(v & Z_EROFS_FOLIO_EIO));
SetPageUptodate(page);
unlock_page(page);
}
} }
#define Z_EROFS_ONSTACK_PAGES 32 #define Z_EROFS_ONSTACK_PAGES 32
...@@ -572,17 +574,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) ...@@ -572,17 +574,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
for (i = 0; i < pclusterpages; ++i) { for (i = 0; i < pclusterpages; ++i) {
struct page *page, *newpage; struct page *page, *newpage;
void *t; /* mark pages just found for debugging */
/* Inaccurate check w/o locking to avoid unneeded lookups */ /* Inaccurate check w/o locking to avoid unneeded lookups */
if (READ_ONCE(pcl->compressed_bvecs[i].page)) if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue; continue;
page = find_get_page(mc, pcl->obj.index + i); page = find_get_page(mc, pcl->obj.index + i);
if (page) { if (!page) {
t = (void *)((unsigned long)page | 1);
newpage = NULL;
} else {
/* I/O is needed, no possible to decompress directly */ /* I/O is needed, no possible to decompress directly */
standalone = false; standalone = false;
if (!shouldalloc) if (!shouldalloc)
...@@ -596,11 +594,10 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) ...@@ -596,11 +594,10 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
if (!newpage) if (!newpage)
continue; continue;
set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
t = (void *)((unsigned long)newpage | 1);
} }
spin_lock(&pcl->obj.lockref.lock); spin_lock(&pcl->obj.lockref.lock);
if (!pcl->compressed_bvecs[i].page) { if (!pcl->compressed_bvecs[i].page) {
pcl->compressed_bvecs[i].page = t; pcl->compressed_bvecs[i].page = page ? page : newpage;
spin_unlock(&pcl->obj.lockref.lock); spin_unlock(&pcl->obj.lockref.lock);
continue; continue;
} }
...@@ -620,8 +617,8 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) ...@@ -620,8 +617,8 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
} }
/* called by erofs_shrinker to get rid of all compressed_pages */ /* called by erofs_shrinker to get rid of all cached compressed bvecs */
int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp) struct erofs_workgroup *grp)
{ {
struct z_erofs_pcluster *const pcl = struct z_erofs_pcluster *const pcl =
...@@ -630,27 +627,22 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, ...@@ -630,27 +627,22 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
int i; int i;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
/* /* There is no actice user since the pcluster is now freezed */
* refcount of workgroup is now freezed as 0,
* therefore no need to worry about available decompression users.
*/
for (i = 0; i < pclusterpages; ++i) { for (i = 0; i < pclusterpages; ++i) {
struct page *page = pcl->compressed_bvecs[i].page; struct folio *folio = pcl->compressed_bvecs[i].folio;
if (!page) if (!folio)
continue; continue;
/* block other users from reclaiming or migrating the page */ /* Avoid reclaiming or migrating this folio */
if (!trylock_page(page)) if (!folio_trylock(folio))
return -EBUSY; return -EBUSY;
if (!erofs_page_is_managed(sbi, page)) if (!erofs_folio_is_managed(sbi, folio))
continue; continue;
pcl->compressed_bvecs[i].folio = NULL;
/* barrier is implied in the following 'unlock_page' */ folio_detach_private(folio);
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); folio_unlock(folio);
detach_page_private(page);
unlock_page(page);
} }
return 0; return 0;
} }
...@@ -667,20 +659,17 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) ...@@ -667,20 +659,17 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
ret = false; ret = false;
spin_lock(&pcl->obj.lockref.lock); spin_lock(&pcl->obj.lockref.lock);
if (pcl->obj.lockref.count > 0) if (pcl->obj.lockref.count <= 0) {
goto out;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
for (i = 0; i < pclusterpages; ++i) { for (i = 0; i < pclusterpages; ++i) {
if (pcl->compressed_bvecs[i].page == &folio->page) { if (pcl->compressed_bvecs[i].folio == folio) {
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); pcl->compressed_bvecs[i].folio = NULL;
folio_detach_private(folio);
ret = true; ret = true;
break; break;
} }
} }
if (ret) }
folio_detach_private(folio);
out:
spin_unlock(&pcl->obj.lockref.lock); spin_unlock(&pcl->obj.lockref.lock);
return ret; return ret;
} }
...@@ -962,20 +951,20 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page, ...@@ -962,20 +951,20 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
return 0; return 0;
} }
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe,
struct page *page, bool ra) struct folio *folio, bool ra)
{ {
struct inode *const inode = fe->inode; struct inode *const inode = fe->inode;
struct erofs_map_blocks *const map = &fe->map; struct erofs_map_blocks *const map = &fe->map;
const loff_t offset = page_offset(page); const loff_t offset = folio_pos(folio);
const unsigned int bs = i_blocksize(inode); const unsigned int bs = i_blocksize(inode), fs = folio_size(folio);
bool tight = true, exclusive; bool tight = true, exclusive;
unsigned int cur, end, len, split; unsigned int cur, end, len, split;
int err = 0; int err = 0;
z_erofs_onlinepage_init(page); z_erofs_onlinefolio_init(folio);
split = 0; split = 0;
end = PAGE_SIZE; end = fs;
repeat: repeat:
if (offset + end - 1 < map->m_la || if (offset + end - 1 < map->m_la ||
offset + end - 1 >= map->m_la + map->m_llen) { offset + end - 1 >= map->m_la + map->m_llen) {
...@@ -992,7 +981,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, ...@@ -992,7 +981,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
++split; ++split;
if (!(map->m_flags & EROFS_MAP_MAPPED)) { if (!(map->m_flags & EROFS_MAP_MAPPED)) {
zero_user_segment(page, cur, end); folio_zero_segment(folio, cur, end);
tight = false; tight = false;
goto next_part; goto next_part;
} }
...@@ -1001,8 +990,8 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, ...@@ -1001,8 +990,8 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
erofs_off_t fpos = offset + cur - map->m_la; erofs_off_t fpos = offset + cur - map->m_la;
len = min_t(unsigned int, map->m_llen - fpos, end - cur); len = min_t(unsigned int, map->m_llen - fpos, end - cur);
err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len, err = z_erofs_read_fragment(inode->i_sb, &folio->page, cur,
EROFS_I(inode)->z_fragmentoff + fpos); cur + len, EROFS_I(inode)->z_fragmentoff + fpos);
if (err) if (err)
goto out; goto out;
tight = false; tight = false;
...@@ -1017,25 +1006,25 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, ...@@ -1017,25 +1006,25 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
} }
/* /*
* Ensure the current partial page belongs to this submit chain rather * Ensure the current partial folio belongs to this submit chain rather
* than other concurrent submit chains or the noio(bypass) chain since * than other concurrent submit chains or the noio(bypass) chain since
* those chains are handled asynchronously thus the page cannot be used * those chains are handled asynchronously thus the folio cannot be used
* for inplace I/O or bvpage (should be processed in a strict order.) * for inplace I/O or bvpage (should be processed in a strict order.)
*/ */
tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
exclusive = (!cur && ((split <= 1) || (tight && bs == PAGE_SIZE))); exclusive = (!cur && ((split <= 1) || (tight && bs == fs)));
if (cur) if (cur)
tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED); tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) { err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) {
.page = page, .page = &folio->page,
.offset = offset - map->m_la, .offset = offset - map->m_la,
.end = end, .end = end,
}), exclusive); }), exclusive);
if (err) if (err)
goto out; goto out;
z_erofs_onlinepage_split(page); z_erofs_onlinefolio_split(folio);
if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
fe->pcl->multibases = true; fe->pcl->multibases = true;
if (fe->pcl->length < offset + end - map->m_la) { if (fe->pcl->length < offset + end - map->m_la) {
...@@ -1056,7 +1045,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, ...@@ -1056,7 +1045,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
goto repeat; goto repeat;
out: out:
z_erofs_onlinepage_endio(page, err); z_erofs_onlinefolio_end(folio, err);
return err; return err;
} }
...@@ -1159,7 +1148,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be, ...@@ -1159,7 +1148,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
cur += len; cur += len;
} }
kunmap_local(dst); kunmap_local(dst);
z_erofs_onlinepage_endio(bvi->bvec.page, err); z_erofs_onlinefolio_end(page_folio(bvi->bvec.page), err);
list_del(p); list_del(p);
kfree(bvi); kfree(bvi);
} }
...@@ -1210,7 +1199,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be, ...@@ -1210,7 +1199,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
be->compressed_pages[i] = page; be->compressed_pages[i] = page;
if (z_erofs_is_inline_pcluster(pcl) || if (z_erofs_is_inline_pcluster(pcl) ||
erofs_page_is_managed(EROFS_SB(be->sb), page)) { erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) {
if (!PageUptodate(page)) if (!PageUptodate(page))
err = -EIO; err = -EIO;
continue; continue;
...@@ -1295,7 +1284,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, ...@@ -1295,7 +1284,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
/* consider shortlived pages added when decompressing */ /* consider shortlived pages added when decompressing */
page = be->compressed_pages[i]; page = be->compressed_pages[i];
if (!page || erofs_page_is_managed(sbi, page)) if (!page ||
erofs_folio_is_managed(sbi, page_folio(page)))
continue; continue;
(void)z_erofs_put_shortlivedpage(be->pagepool, page); (void)z_erofs_put_shortlivedpage(be->pagepool, page);
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
...@@ -1316,7 +1306,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, ...@@ -1316,7 +1306,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
/* recycle all individual short-lived pages */ /* recycle all individual short-lived pages */
if (z_erofs_put_shortlivedpage(be->pagepool, page)) if (z_erofs_put_shortlivedpage(be->pagepool, page))
continue; continue;
z_erofs_onlinepage_endio(page, err); z_erofs_onlinefolio_end(page_folio(page), err);
} }
if (be->decompressed_pages != be->onstack_pages) if (be->decompressed_pages != be->onstack_pages)
...@@ -1430,38 +1420,34 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, ...@@ -1430,38 +1420,34 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
struct z_erofs_bvec zbv; struct z_erofs_bvec zbv;
struct address_space *mapping; struct address_space *mapping;
struct page *page; struct page *page;
int justfound, bs = i_blocksize(f->inode); int bs = i_blocksize(f->inode);
/* Except for inplace pages, the entire page can be used for I/Os */ /* Except for inplace folios, the entire folio can be used for I/Os */
bvec->bv_offset = 0; bvec->bv_offset = 0;
bvec->bv_len = PAGE_SIZE; bvec->bv_len = PAGE_SIZE;
repeat: repeat:
spin_lock(&pcl->obj.lockref.lock); spin_lock(&pcl->obj.lockref.lock);
zbv = pcl->compressed_bvecs[nr]; zbv = pcl->compressed_bvecs[nr];
page = zbv.page;
justfound = (unsigned long)page & 1UL;
page = (struct page *)((unsigned long)page & ~1UL);
pcl->compressed_bvecs[nr].page = page;
spin_unlock(&pcl->obj.lockref.lock); spin_unlock(&pcl->obj.lockref.lock);
if (!page) if (!zbv.folio)
goto out_allocpage; goto out_allocfolio;
bvec->bv_page = page; bvec->bv_page = &zbv.folio->page;
DBG_BUGON(z_erofs_is_shortlived_page(page)); DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page));
/* /*
* Handle preallocated cached pages. We tried to allocate such pages * Handle preallocated cached folios. We tried to allocate such folios
* without triggering direct reclaim. If allocation failed, inplace * without triggering direct reclaim. If allocation failed, inplace
* file-backed pages will be used instead. * file-backed folios will be used instead.
*/ */
if (page->private == Z_EROFS_PREALLOCATED_PAGE) { if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
set_page_private(page, 0); zbv.folio->private = 0;
tocache = true; tocache = true;
goto out_tocache; goto out_tocache;
} }
mapping = READ_ONCE(page->mapping); mapping = READ_ONCE(zbv.folio->mapping);
/* /*
* File-backed pages for inplace I/Os are all locked steady, * File-backed folios for inplace I/Os are all locked steady,
* therefore it is impossible for `mapping` to be NULL. * therefore it is impossible for `mapping` to be NULL.
*/ */
if (mapping && mapping != mc) { if (mapping && mapping != mc) {
...@@ -1471,26 +1457,21 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, ...@@ -1471,26 +1457,21 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
return; return;
} }
lock_page(page); folio_lock(zbv.folio);
/* only true if page reclaim goes wrong, should never happen */ if (zbv.folio->mapping == mc) {
DBG_BUGON(justfound && PagePrivate(page));
/* the cached page is still in managed cache */
if (page->mapping == mc) {
/* /*
* The cached page is still available but without a valid * The cached folio is still in managed cache but without
* `->private` pcluster hint. Let's reconnect them. * a valid `->private` pcluster hint. Let's reconnect them.
*/ */
if (!PagePrivate(page)) { if (!folio_test_private(zbv.folio)) {
DBG_BUGON(!justfound); folio_attach_private(zbv.folio, pcl);
/* compressed_bvecs[] already takes a ref */ /* compressed_bvecs[] already takes a ref before */
attach_page_private(page, pcl); folio_put(zbv.folio);
put_page(page);
} }
/* no need to submit if it is already up-to-date */ /* no need to submit if it is already up-to-date */
if (PageUptodate(page)) { if (folio_test_uptodate(zbv.folio)) {
unlock_page(page); folio_unlock(zbv.folio);
bvec->bv_page = NULL; bvec->bv_page = NULL;
} }
return; return;
...@@ -1500,34 +1481,32 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, ...@@ -1500,34 +1481,32 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
* It has been truncated, so it's unsafe to reuse this one. Let's * It has been truncated, so it's unsafe to reuse this one. Let's
* allocate a new page for compressed data. * allocate a new page for compressed data.
*/ */
DBG_BUGON(page->mapping); DBG_BUGON(zbv.folio->mapping);
DBG_BUGON(!justfound);
tocache = true; tocache = true;
unlock_page(page); folio_unlock(zbv.folio);
put_page(page); folio_put(zbv.folio);
out_allocpage: out_allocfolio:
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
spin_lock(&pcl->obj.lockref.lock); spin_lock(&pcl->obj.lockref.lock);
if (pcl->compressed_bvecs[nr].page) { if (pcl->compressed_bvecs[nr].folio) {
erofs_pagepool_add(&f->pagepool, page); erofs_pagepool_add(&f->pagepool, page);
spin_unlock(&pcl->obj.lockref.lock); spin_unlock(&pcl->obj.lockref.lock);
cond_resched(); cond_resched();
goto repeat; goto repeat;
} }
pcl->compressed_bvecs[nr].page = page; pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page);
spin_unlock(&pcl->obj.lockref.lock); spin_unlock(&pcl->obj.lockref.lock);
bvec->bv_page = page; bvec->bv_page = page;
out_tocache: out_tocache:
if (!tocache || bs != PAGE_SIZE || if (!tocache || bs != PAGE_SIZE ||
add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) { filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) {
/* turn into a temporary shortlived page (1 ref) */ /* turn into a temporary shortlived folio (1 ref) */
set_page_private(page, Z_EROFS_SHORTLIVED_PAGE); zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
return; return;
} }
attach_page_private(page, pcl); folio_attach_private(zbv.folio, pcl);
/* drop a refcount added by allocpage (then 2 refs in total here) */ /* drop a refcount added by allocpage (then 2 refs in total here) */
put_page(page); folio_put(zbv.folio);
} }
static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb, static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,
...@@ -1582,27 +1561,28 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, ...@@ -1582,27 +1561,28 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
qtail[JQ_BYPASS] = &pcl->next; qtail[JQ_BYPASS] = &pcl->next;
} }
static void z_erofs_submissionqueue_endio(struct bio *bio) static void z_erofs_endio(struct bio *bio)
{ {
struct z_erofs_decompressqueue *q = bio->bi_private; struct z_erofs_decompressqueue *q = bio->bi_private;
blk_status_t err = bio->bi_status; blk_status_t err = bio->bi_status;
struct bio_vec *bvec; struct folio_iter fi;
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bvec, bio, iter_all) { bio_for_each_folio_all(fi, bio) {
struct page *page = bvec->bv_page; struct folio *folio = fi.folio;
DBG_BUGON(folio_test_uptodate(folio));
DBG_BUGON(z_erofs_page_is_invalidated(&folio->page));
if (!erofs_folio_is_managed(EROFS_SB(q->sb), folio))
continue;
DBG_BUGON(PageUptodate(page));
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
if (!err) if (!err)
SetPageUptodate(page); folio_mark_uptodate(folio);
unlock_page(page); folio_unlock(folio);
}
} }
if (err) if (err)
q->eio = true; q->eio = true;
z_erofs_decompress_kickoff(q, -1); z_erofs_decompress_kickoff(q, -1);
if (bio->bi_bdev)
bio_put(bio); bio_put(bio);
} }
...@@ -1617,7 +1597,6 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, ...@@ -1617,7 +1597,6 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
z_erofs_next_pcluster_t owned_head = f->owned_head; z_erofs_next_pcluster_t owned_head = f->owned_head;
/* bio is NULL initially, so no need to initialize last_{index,bdev} */ /* bio is NULL initially, so no need to initialize last_{index,bdev} */
erofs_off_t last_pa; erofs_off_t last_pa;
struct block_device *last_bdev;
unsigned int nr_bios = 0; unsigned int nr_bios = 0;
struct bio *bio = NULL; struct bio *bio = NULL;
unsigned long pflags; unsigned long pflags;
...@@ -1664,9 +1643,13 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, ...@@ -1664,9 +1643,13 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
continue; continue;
if (bio && (cur != last_pa || if (bio && (cur != last_pa ||
last_bdev != mdev.m_bdev)) { bio->bi_bdev != mdev.m_bdev)) {
submit_bio_retry: io_retry:
if (!erofs_is_fscache_mode(sb))
submit_bio(bio); submit_bio(bio);
else
erofs_fscache_submit_bio(bio);
if (memstall) { if (memstall) {
psi_memstall_leave(&pflags); psi_memstall_leave(&pflags);
memstall = 0; memstall = 0;
...@@ -1681,15 +1664,16 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, ...@@ -1681,15 +1664,16 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
} }
if (!bio) { if (!bio) {
bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, bio = erofs_is_fscache_mode(sb) ?
erofs_fscache_bio_alloc(&mdev) :
bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
REQ_OP_READ, GFP_NOIO); REQ_OP_READ, GFP_NOIO);
bio->bi_end_io = z_erofs_submissionqueue_endio; bio->bi_end_io = z_erofs_endio;
bio->bi_iter.bi_sector = cur >> 9; bio->bi_iter.bi_sector = cur >> 9;
bio->bi_private = q[JQ_SUBMIT]; bio->bi_private = q[JQ_SUBMIT];
if (readahead) if (readahead)
bio->bi_opf |= REQ_RAHEAD; bio->bi_opf |= REQ_RAHEAD;
++nr_bios; ++nr_bios;
last_bdev = mdev.m_bdev;
} }
if (cur + bvec.bv_len > end) if (cur + bvec.bv_len > end)
...@@ -1697,7 +1681,7 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, ...@@ -1697,7 +1681,7 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
DBG_BUGON(bvec.bv_len < sb->s_blocksize); DBG_BUGON(bvec.bv_len < sb->s_blocksize);
if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
bvec.bv_offset)) bvec.bv_offset))
goto submit_bio_retry; goto io_retry;
last_pa = cur + bvec.bv_len; last_pa = cur + bvec.bv_len;
bypass = false; bypass = false;
...@@ -1710,7 +1694,10 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, ...@@ -1710,7 +1694,10 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
} while (owned_head != Z_EROFS_PCLUSTER_TAIL); } while (owned_head != Z_EROFS_PCLUSTER_TAIL);
if (bio) { if (bio) {
if (!erofs_is_fscache_mode(sb))
submit_bio(bio); submit_bio(bio);
else
erofs_fscache_submit_bio(bio);
if (memstall) if (memstall)
psi_memstall_leave(&pflags); psi_memstall_leave(&pflags);
} }
...@@ -1795,7 +1782,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, ...@@ -1795,7 +1782,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
if (PageUptodate(page)) if (PageUptodate(page))
unlock_page(page); unlock_page(page);
else else
(void)z_erofs_do_read_page(f, page, !!rac); z_erofs_scan_folio(f, page_folio(page), !!rac);
put_page(page); put_page(page);
} }
...@@ -1816,7 +1803,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) ...@@ -1816,7 +1803,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
z_erofs_pcluster_readmore(&f, NULL, true); z_erofs_pcluster_readmore(&f, NULL, true);
err = z_erofs_do_read_page(&f, &folio->page, false); err = z_erofs_scan_folio(&f, folio, false);
z_erofs_pcluster_readmore(&f, NULL, false); z_erofs_pcluster_readmore(&f, NULL, false);
z_erofs_pcluster_end(&f); z_erofs_pcluster_end(&f);
...@@ -1857,7 +1844,7 @@ static void z_erofs_readahead(struct readahead_control *rac) ...@@ -1857,7 +1844,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
folio = head; folio = head;
head = folio_get_private(folio); head = folio_get_private(folio);
err = z_erofs_do_read_page(&f, &folio->page, true); err = z_erofs_scan_folio(&f, folio, true);
if (err && err != -EINTR) if (err && err != -EINTR)
erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu", erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
folio->index, EROFS_I(inode)->nid); folio->index, EROFS_I(inode)->nid);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment