Commit 69a3a0a4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'erofs-for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "In this cycle, we add file-backed mount support, which has has been a
  strong requirement for years. It is especially useful when there are
  thousands of images running on the same host for containers and other
  sandbox use cases, unlike OS image use cases.

  Without file-backed mounts, it's hard for container runtimes to manage
  and isolate so many unnecessary virtual block devices safely and
  efficiently, therefore file-backed mounts are highly preferred. For
  EROFS users, ComposeFS [1], containerd, and Android APEXes [2] will
  directly benefit from it, and I've seen no risk in implementing it as
  a completely immutable filesystem.

  The previous experimental feature "EROFS over fscache" is now marked
  as deprecated because:

   - Fscache is no longer an independent subsystem and has been merged
     into netfs, which was somewhat unexpected when it was proposed.

   - New HSM "fanotify pre-content hooks" [3] will be landed upstream.
     These hooks will replace "EROFS over fscache" in a simpler way, as
     EROFS won't be bother with kernel caching anymore. Userspace
     programs can also manage their own caching hierarchy more flexibly.

  Once the HSM "fanotify pre-content hooks" is landed, I will remove the
  fscache backend entirely as an internal dependency cleanup. More
  backgrounds are listed in the original patchset [4].

  In addition to that, there are bugfixes and cleanups as usual.

  Summary:

   - Support file-backed mounts for containers and sandboxes

   - Mark the experimental fscache backend as deprecated

   - Handle overlapped pclusters caused by crafted images properly

   - Fix a failure path which could cause infinite loops in
     z_erofs_init_decompressor()

   - Get rid of unnecessary NOFAILs

   - Harmless on-disk hardening & minor cleanups"

* tag 'erofs-for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: reject inodes with negative i_size
  erofs: restrict pcluster size limitations
  erofs: allocate more short-lived pages from reserved pool first
  erofs: sunset unneeded NOFAILs
  erofs: simplify erofs_map_blocks_flatmode()
  erofs: refactor read_inode calling convention
  erofs: use kmemdup_nul in erofs_fill_symlink
  erofs: mark experimental fscache backend deprecated
  erofs: support compressed inodes for fileio
  erofs: support unencoded inodes for fileio
  erofs: add file-backed mount support
  erofs: handle overlapped pclusters out of crafted images properly
  erofs: fix error handling in z_erofs_init_decompressor
  erofs: clean up erofs_register_sysfs()
  erofs: fix incorrect symlink detection in fast symlink
parents 7a40974f 025497e1
...@@ -74,6 +74,23 @@ config EROFS_FS_SECURITY ...@@ -74,6 +74,23 @@ config EROFS_FS_SECURITY
If you are not using a security module, say N. If you are not using a security module, say N.
config EROFS_FS_BACKED_BY_FILE
bool "File-backed EROFS filesystem support"
depends on EROFS_FS
default y
help
This allows EROFS to use filesystem image files directly, without
the intercession of loopback block devices or likewise. It is
particularly useful for container images with numerous blobs and
other sandboxes, where loop devices behave intricately. It can also
be used to simplify error-prone lifetime management of unnecessary
virtual block devices.
Note that this feature, along with ongoing fanotify pre-content
hooks, will eventually replace "EROFS over fscache."
If you don't want to enable this feature, say N.
config EROFS_FS_ZIP config EROFS_FS_ZIP
bool "EROFS Data Compression Support" bool "EROFS Data Compression Support"
depends on EROFS_FS depends on EROFS_FS
...@@ -128,7 +145,7 @@ config EROFS_FS_ZIP_ZSTD ...@@ -128,7 +145,7 @@ config EROFS_FS_ZIP_ZSTD
If unsure, say N. If unsure, say N.
config EROFS_FS_ONDEMAND config EROFS_FS_ONDEMAND
bool "EROFS fscache-based on-demand read support" bool "EROFS fscache-based on-demand read support (deprecated)"
depends on EROFS_FS depends on EROFS_FS
select NETFS_SUPPORT select NETFS_SUPPORT
select FSCACHE select FSCACHE
...@@ -138,6 +155,9 @@ config EROFS_FS_ONDEMAND ...@@ -138,6 +155,9 @@ config EROFS_FS_ONDEMAND
This permits EROFS to use fscache-backed data blobs with on-demand This permits EROFS to use fscache-backed data blobs with on-demand
read support. read support.
It is now deprecated and scheduled to be removed from the kernel
after fanotify pre-content hooks are landed.
If unsure, say N. If unsure, say N.
config EROFS_FS_PCPU_KTHREAD config EROFS_FS_PCPU_KTHREAD
......
...@@ -7,4 +7,5 @@ erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o ...@@ -7,4 +7,5 @@ erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
...@@ -59,8 +59,12 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, ...@@ -59,8 +59,12 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset,
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb) void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
{ {
if (erofs_is_fscache_mode(sb)) struct erofs_sb_info *sbi = EROFS_SB(sb);
buf->mapping = EROFS_SB(sb)->s_fscache->inode->i_mapping;
if (erofs_is_fileio_mode(sbi))
buf->mapping = file_inode(sbi->fdev)->i_mapping;
else if (erofs_is_fscache_mode(sb))
buf->mapping = sbi->s_fscache->inode->i_mapping;
else else
buf->mapping = sb->s_bdev->bd_mapping; buf->mapping = sb->s_bdev->bd_mapping;
} }
...@@ -75,38 +79,28 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, ...@@ -75,38 +79,28 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
static int erofs_map_blocks_flatmode(struct inode *inode, static int erofs_map_blocks_flatmode(struct inode *inode,
struct erofs_map_blocks *map) struct erofs_map_blocks *map)
{ {
erofs_blk_t nblocks, lastblk;
u64 offset = map->m_la;
struct erofs_inode *vi = EROFS_I(inode); struct erofs_inode *vi = EROFS_I(inode);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking;
nblocks = erofs_iblks(inode); map->m_flags = EROFS_MAP_MAPPED; /* no hole in flat inodes */
lastblk = nblocks - tailendpacking; if (map->m_la < erofs_pos(sb, lastblk)) {
/* there is no hole in flatmode */
map->m_flags = EROFS_MAP_MAPPED;
if (offset < erofs_pos(sb, lastblk)) {
map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la; map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la;
map->m_plen = erofs_pos(sb, lastblk) - offset; map->m_plen = erofs_pos(sb, lastblk) - map->m_la;
} else if (tailendpacking) { } else {
DBG_BUGON(!tailendpacking);
map->m_pa = erofs_iloc(inode) + vi->inode_isize + map->m_pa = erofs_iloc(inode) + vi->inode_isize +
vi->xattr_isize + erofs_blkoff(sb, offset); vi->xattr_isize + erofs_blkoff(sb, map->m_la);
map->m_plen = inode->i_size - offset; map->m_plen = inode->i_size - map->m_la;
/* inline data should be located in the same meta block */ /* inline data should be located in the same meta block */
if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
erofs_err(sb, "inline data cross block boundary @ nid %llu", erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid);
vi->nid);
DBG_BUGON(1); DBG_BUGON(1);
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
map->m_flags |= EROFS_MAP_META; map->m_flags |= EROFS_MAP_META;
} else {
erofs_err(sb, "internal error @ nid: %llu (size %llu), m_la 0x%llx",
vi->nid, inode->i_size, map->m_la);
DBG_BUGON(1);
return -EIO;
} }
return 0; return 0;
} }
...@@ -128,7 +122,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) ...@@ -128,7 +122,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
if (map->m_la >= inode->i_size) { if (map->m_la >= inode->i_size) {
/* leave out-of-bound access unmapped */ /* leave out-of-bound access unmapped */
map->m_flags = 0; map->m_flags = 0;
map->m_plen = 0; map->m_plen = map->m_llen;
goto out; goto out;
} }
...@@ -189,16 +183,34 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) ...@@ -189,16 +183,34 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
return err; return err;
} }
static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
struct erofs_device_info *dif)
{
map->m_bdev = NULL;
map->m_fp = NULL;
if (dif->file) {
if (S_ISBLK(file_inode(dif->file)->i_mode))
map->m_bdev = file_bdev(dif->file);
else
map->m_fp = dif->file;
}
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
}
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
{ {
struct erofs_dev_context *devs = EROFS_SB(sb)->devs; struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
struct erofs_device_info *dif; struct erofs_device_info *dif;
erofs_off_t startoff, length;
int id; int id;
map->m_bdev = sb->s_bdev; map->m_bdev = sb->s_bdev;
map->m_daxdev = EROFS_SB(sb)->dax_dev; map->m_daxdev = EROFS_SB(sb)->dax_dev;
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
map->m_fscache = EROFS_SB(sb)->s_fscache; map->m_fscache = EROFS_SB(sb)->s_fscache;
map->m_fp = EROFS_SB(sb)->fdev;
if (map->m_deviceid) { if (map->m_deviceid) {
down_read(&devs->rwsem); down_read(&devs->rwsem);
...@@ -212,29 +224,20 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) ...@@ -212,29 +224,20 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
up_read(&devs->rwsem); up_read(&devs->rwsem);
return 0; return 0;
} }
map->m_bdev = dif->bdev_file ? file_bdev(dif->bdev_file) : NULL; erofs_fill_from_devinfo(map, dif);
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
up_read(&devs->rwsem); up_read(&devs->rwsem);
} else if (devs->extra_devices && !devs->flatdev) { } else if (devs->extra_devices && !devs->flatdev) {
down_read(&devs->rwsem); down_read(&devs->rwsem);
idr_for_each_entry(&devs->tree, dif, id) { idr_for_each_entry(&devs->tree, dif, id) {
erofs_off_t startoff, length;
if (!dif->mapped_blkaddr) if (!dif->mapped_blkaddr)
continue; continue;
startoff = erofs_pos(sb, dif->mapped_blkaddr); startoff = erofs_pos(sb, dif->mapped_blkaddr);
length = erofs_pos(sb, dif->blocks); length = erofs_pos(sb, dif->blocks);
if (map->m_pa >= startoff && if (map->m_pa >= startoff &&
map->m_pa < startoff + length) { map->m_pa < startoff + length) {
map->m_pa -= startoff; map->m_pa -= startoff;
map->m_bdev = dif->bdev_file ? erofs_fill_from_devinfo(map, dif);
file_bdev(dif->bdev_file) : NULL;
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
break; break;
} }
} }
...@@ -243,6 +246,42 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) ...@@ -243,6 +246,42 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
return 0; return 0;
} }
/*
* bit 30: I/O error occurred on this folio
* bit 0 - 29: remaining parts to complete this folio
*/
#define EROFS_ONLINEFOLIO_EIO (1 << 30)
void erofs_onlinefolio_init(struct folio *folio)
{
union {
atomic_t o;
void *v;
} u = { .o = ATOMIC_INIT(1) };
folio->private = u.v; /* valid only if file-backed folio is locked */
}
void erofs_onlinefolio_split(struct folio *folio)
{
atomic_inc((atomic_t *)&folio->private);
}
void erofs_onlinefolio_end(struct folio *folio, int err)
{
int orig, v;
do {
orig = atomic_read((atomic_t *)&folio->private);
v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
} while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
if (v & ~EROFS_ONLINEFOLIO_EIO)
return;
folio->private = 0;
folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
}
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned int flags, struct iomap *iomap, struct iomap *srcmap) unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
{ {
...@@ -392,7 +431,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -392,7 +431,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
} }
/* for uncompressed (aligned) files and raw access for other files */ /* for uncompressed (aligned) files and raw access for other files */
const struct address_space_operations erofs_raw_access_aops = { const struct address_space_operations erofs_aops = {
.read_folio = erofs_read_folio, .read_folio = erofs_read_folio,
.readahead = erofs_readahead, .readahead = erofs_readahead,
.bmap = erofs_bmap, .bmap = erofs_bmap,
......
...@@ -539,7 +539,7 @@ int __init z_erofs_init_decompressor(void) ...@@ -539,7 +539,7 @@ int __init z_erofs_init_decompressor(void)
for (i = 0; i < Z_EROFS_COMPRESSION_MAX; ++i) { for (i = 0; i < Z_EROFS_COMPRESSION_MAX; ++i) {
err = z_erofs_decomp[i] ? z_erofs_decomp[i]->init() : 0; err = z_erofs_decomp[i] ? z_erofs_decomp[i]->init() : 0;
if (err) { if (err) {
while (--i) while (i--)
if (z_erofs_decomp[i]) if (z_erofs_decomp[i])
z_erofs_decomp[i]->exit(); z_erofs_decomp[i]->exit();
return err; return err;
......
...@@ -288,9 +288,12 @@ struct erofs_dirent { ...@@ -288,9 +288,12 @@ struct erofs_dirent {
#define EROFS_NAME_LEN 255 #define EROFS_NAME_LEN 255
/* maximum supported size of a physical compression cluster */ /* maximum supported encoded size of a physical compressed cluster */
#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024) #define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)
/* maximum supported decoded size of a physical compressed cluster */
#define Z_EROFS_PCLUSTER_MAX_DSIZE (12 * 1024 * 1024)
/* available compression algorithm types (for h_algorithmtype) */ /* available compression algorithm types (for h_algorithmtype) */
enum { enum {
Z_EROFS_COMPRESSION_LZ4 = 0, Z_EROFS_COMPRESSION_LZ4 = 0,
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2024, Alibaba Cloud
*/
#include "internal.h"
#include <trace/events/erofs.h>
struct erofs_fileio_rq {
struct bio_vec bvecs[BIO_MAX_VECS];
struct bio bio;
struct kiocb iocb;
};
struct erofs_fileio {
struct erofs_map_blocks map;
struct erofs_map_dev dev;
struct erofs_fileio_rq *rq;
};
static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
{
struct erofs_fileio_rq *rq =
container_of(iocb, struct erofs_fileio_rq, iocb);
struct folio_iter fi;
if (ret > 0) {
if (ret != rq->bio.bi_iter.bi_size) {
bio_advance(&rq->bio, ret);
zero_fill_bio(&rq->bio);
}
ret = 0;
}
if (rq->bio.bi_end_io) {
rq->bio.bi_end_io(&rq->bio);
} else {
bio_for_each_folio_all(fi, &rq->bio) {
DBG_BUGON(folio_test_uptodate(fi.folio));
erofs_onlinefolio_end(fi.folio, ret);
}
}
bio_uninit(&rq->bio);
kfree(rq);
}
static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
{
struct iov_iter iter;
int ret;
if (!rq)
return;
rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << SECTOR_SHIFT;
rq->iocb.ki_ioprio = get_current_ioprio();
rq->iocb.ki_complete = erofs_fileio_ki_complete;
rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ?
IOCB_DIRECT : 0;
iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt,
rq->bio.bi_iter.bi_size);
ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter);
if (ret != -EIOCBQUEUED)
erofs_fileio_ki_complete(&rq->iocb, ret);
}
static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
{
struct erofs_fileio_rq *rq = kzalloc(sizeof(*rq),
GFP_KERNEL | __GFP_NOFAIL);
bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
rq->iocb.ki_filp = mdev->m_fp;
return rq;
}
struct bio *erofs_fileio_bio_alloc(struct erofs_map_dev *mdev)
{
return &erofs_fileio_rq_alloc(mdev)->bio;
}
void erofs_fileio_submit_bio(struct bio *bio)
{
return erofs_fileio_rq_submit(container_of(bio, struct erofs_fileio_rq,
bio));
}
static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
{
struct inode *inode = folio_inode(folio);
struct erofs_map_blocks *map = &io->map;
unsigned int cur = 0, end = folio_size(folio), len, attached = 0;
loff_t pos = folio_pos(folio), ofs;
struct iov_iter iter;
struct bio_vec bv;
int err = 0;
erofs_onlinefolio_init(folio);
while (cur < end) {
if (!in_range(pos + cur, map->m_la, map->m_llen)) {
map->m_la = pos + cur;
map->m_llen = end - cur;
err = erofs_map_blocks(inode, map);
if (err)
break;
}
ofs = folio_pos(folio) + cur - map->m_la;
len = min_t(loff_t, map->m_llen - ofs, end - cur);
if (map->m_flags & EROFS_MAP_META) {
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
void *src;
src = erofs_read_metabuf(&buf, inode->i_sb,
map->m_pa + ofs, EROFS_KMAP);
if (IS_ERR(src)) {
err = PTR_ERR(src);
break;
}
bvec_set_folio(&bv, folio, len, cur);
iov_iter_bvec(&iter, ITER_DEST, &bv, 1, len);
if (copy_to_iter(src, len, &iter) != len) {
erofs_put_metabuf(&buf);
err = -EIO;
break;
}
erofs_put_metabuf(&buf);
} else if (!(map->m_flags & EROFS_MAP_MAPPED)) {
folio_zero_segment(folio, cur, cur + len);
attached = 0;
} else {
if (io->rq && (map->m_pa + ofs != io->dev.m_pa ||
map->m_deviceid != io->dev.m_deviceid)) {
io_retry:
erofs_fileio_rq_submit(io->rq);
io->rq = NULL;
}
if (!io->rq) {
io->dev = (struct erofs_map_dev) {
.m_pa = io->map.m_pa + ofs,
.m_deviceid = io->map.m_deviceid,
};
err = erofs_map_dev(inode->i_sb, &io->dev);
if (err)
break;
io->rq = erofs_fileio_rq_alloc(&io->dev);
io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 9;
attached = 0;
}
if (!attached++)
erofs_onlinefolio_split(folio);
if (!bio_add_folio(&io->rq->bio, folio, len, cur))
goto io_retry;
io->dev.m_pa += len;
}
cur += len;
}
erofs_onlinefolio_end(folio, err);
return err;
}
static int erofs_fileio_read_folio(struct file *file, struct folio *folio)
{
struct erofs_fileio io = {};
int err;
trace_erofs_read_folio(folio, true);
err = erofs_fileio_scan_folio(&io, folio);
erofs_fileio_rq_submit(io.rq);
return err;
}
static void erofs_fileio_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->mapping->host;
struct erofs_fileio io = {};
struct folio *folio;
int err;
trace_erofs_readpages(inode, readahead_index(rac),
readahead_count(rac), true);
while ((folio = readahead_folio(rac))) {
err = erofs_fileio_scan_folio(&io, folio);
if (err && err != -EINTR)
erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
folio->index, EROFS_I(inode)->nid);
}
erofs_fileio_rq_submit(io.rq);
}
const struct address_space_operations erofs_fileio_aops = {
.read_folio = erofs_fileio_read_folio,
.readahead = erofs_fileio_readahead,
};
...@@ -5,11 +5,26 @@ ...@@ -5,11 +5,26 @@
* Copyright (C) 2021, Alibaba Cloud * Copyright (C) 2021, Alibaba Cloud
*/ */
#include "xattr.h" #include "xattr.h"
#include <trace/events/erofs.h> #include <trace/events/erofs.h>
static void *erofs_read_inode(struct erofs_buf *buf, static int erofs_fill_symlink(struct inode *inode, void *kaddr,
struct inode *inode, unsigned int *ofs) unsigned int m_pofs)
{
struct erofs_inode *vi = EROFS_I(inode);
loff_t off;
m_pofs += vi->xattr_isize;
/* check if it cannot be handled with fast symlink scheme */
if (vi->datalayout != EROFS_INODE_FLAT_INLINE ||
check_add_overflow(m_pofs, inode->i_size, &off) ||
off > i_blocksize(inode))
return 0;
inode->i_link = kmemdup_nul(kaddr + m_pofs, inode->i_size, GFP_KERNEL);
return inode->i_link ? 0 : -ENOMEM;
}
static int erofs_read_inode(struct inode *inode)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_sb_info *sbi = EROFS_SB(sb);
...@@ -20,20 +35,21 @@ static void *erofs_read_inode(struct erofs_buf *buf, ...@@ -20,20 +35,21 @@ static void *erofs_read_inode(struct erofs_buf *buf,
struct erofs_inode_compact *dic; struct erofs_inode_compact *dic;
struct erofs_inode_extended *die, *copied = NULL; struct erofs_inode_extended *die, *copied = NULL;
union erofs_inode_i_u iu; union erofs_inode_i_u iu;
unsigned int ifmt; struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
int err; unsigned int ifmt, ofs;
int err = 0;
blkaddr = erofs_blknr(sb, inode_loc); blkaddr = erofs_blknr(sb, inode_loc);
*ofs = erofs_blkoff(sb, inode_loc); ofs = erofs_blkoff(sb, inode_loc);
kaddr = erofs_read_metabuf(buf, sb, erofs_pos(sb, blkaddr), EROFS_KMAP); kaddr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), EROFS_KMAP);
if (IS_ERR(kaddr)) { if (IS_ERR(kaddr)) {
erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld",
vi->nid, PTR_ERR(kaddr)); vi->nid, PTR_ERR(kaddr));
return kaddr; return PTR_ERR(kaddr);
} }
dic = kaddr + *ofs; dic = kaddr + ofs;
ifmt = le16_to_cpu(dic->i_format); ifmt = le16_to_cpu(dic->i_format);
if (ifmt & ~EROFS_I_ALL) { if (ifmt & ~EROFS_I_ALL) {
erofs_err(sb, "unsupported i_format %u of nid %llu", erofs_err(sb, "unsupported i_format %u of nid %llu",
...@@ -54,11 +70,11 @@ static void *erofs_read_inode(struct erofs_buf *buf, ...@@ -54,11 +70,11 @@ static void *erofs_read_inode(struct erofs_buf *buf,
case EROFS_INODE_LAYOUT_EXTENDED: case EROFS_INODE_LAYOUT_EXTENDED:
vi->inode_isize = sizeof(struct erofs_inode_extended); vi->inode_isize = sizeof(struct erofs_inode_extended);
/* check if the extended inode acrosses block boundary */ /* check if the extended inode acrosses block boundary */
if (*ofs + vi->inode_isize <= sb->s_blocksize) { if (ofs + vi->inode_isize <= sb->s_blocksize) {
*ofs += vi->inode_isize; ofs += vi->inode_isize;
die = (struct erofs_inode_extended *)dic; die = (struct erofs_inode_extended *)dic;
} else { } else {
const unsigned int gotten = sb->s_blocksize - *ofs; const unsigned int gotten = sb->s_blocksize - ofs;
copied = kmalloc(vi->inode_isize, GFP_KERNEL); copied = kmalloc(vi->inode_isize, GFP_KERNEL);
if (!copied) { if (!copied) {
...@@ -66,16 +82,16 @@ static void *erofs_read_inode(struct erofs_buf *buf, ...@@ -66,16 +82,16 @@ static void *erofs_read_inode(struct erofs_buf *buf,
goto err_out; goto err_out;
} }
memcpy(copied, dic, gotten); memcpy(copied, dic, gotten);
kaddr = erofs_read_metabuf(buf, sb, erofs_pos(sb, blkaddr + 1), kaddr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr + 1),
EROFS_KMAP); EROFS_KMAP);
if (IS_ERR(kaddr)) { if (IS_ERR(kaddr)) {
erofs_err(sb, "failed to get inode payload block (nid: %llu), err %ld", erofs_err(sb, "failed to get inode payload block (nid: %llu), err %ld",
vi->nid, PTR_ERR(kaddr)); vi->nid, PTR_ERR(kaddr));
kfree(copied); kfree(copied);
return kaddr; return PTR_ERR(kaddr);
} }
*ofs = vi->inode_isize - gotten; ofs = vi->inode_isize - gotten;
memcpy((u8 *)copied + gotten, kaddr, *ofs); memcpy((u8 *)copied + gotten, kaddr, ofs);
die = copied; die = copied;
} }
vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
...@@ -91,11 +107,10 @@ static void *erofs_read_inode(struct erofs_buf *buf, ...@@ -91,11 +107,10 @@ static void *erofs_read_inode(struct erofs_buf *buf,
inode->i_size = le64_to_cpu(die->i_size); inode->i_size = le64_to_cpu(die->i_size);
kfree(copied); kfree(copied);
copied = NULL;
break; break;
case EROFS_INODE_LAYOUT_COMPACT: case EROFS_INODE_LAYOUT_COMPACT:
vi->inode_isize = sizeof(struct erofs_inode_compact); vi->inode_isize = sizeof(struct erofs_inode_compact);
*ofs += vi->inode_isize; ofs += vi->inode_isize;
vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount); vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
inode->i_mode = le16_to_cpu(dic->i_mode); inode->i_mode = le16_to_cpu(dic->i_mode);
...@@ -115,11 +130,21 @@ static void *erofs_read_inode(struct erofs_buf *buf, ...@@ -115,11 +130,21 @@ static void *erofs_read_inode(struct erofs_buf *buf,
goto err_out; goto err_out;
} }
if (unlikely(inode->i_size < 0)) {
erofs_err(sb, "negative i_size @ nid %llu", vi->nid);
err = -EFSCORRUPTED;
goto err_out;
}
switch (inode->i_mode & S_IFMT) { switch (inode->i_mode & S_IFMT) {
case S_IFREG: case S_IFREG:
case S_IFDIR: case S_IFDIR:
case S_IFLNK: case S_IFLNK:
vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr); vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr);
if(S_ISLNK(inode->i_mode)) {
err = erofs_fill_symlink(inode, kaddr, ofs);
if (err)
goto err_out;
}
break; break;
case S_IFCHR: case S_IFCHR:
case S_IFBLK: case S_IFBLK:
...@@ -165,65 +190,23 @@ static void *erofs_read_inode(struct erofs_buf *buf, ...@@ -165,65 +190,23 @@ static void *erofs_read_inode(struct erofs_buf *buf,
inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9; inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9;
else else
inode->i_blocks = nblks << (sb->s_blocksize_bits - 9); inode->i_blocks = nblks << (sb->s_blocksize_bits - 9);
return kaddr;
err_out: err_out:
DBG_BUGON(1); DBG_BUGON(err);
kfree(copied); erofs_put_metabuf(&buf);
erofs_put_metabuf(buf); return err;
return ERR_PTR(err);
}
static int erofs_fill_symlink(struct inode *inode, void *kaddr,
unsigned int m_pofs)
{
struct erofs_inode *vi = EROFS_I(inode);
unsigned int bsz = i_blocksize(inode);
char *lnk;
/* if it cannot be handled with fast symlink scheme */
if (vi->datalayout != EROFS_INODE_FLAT_INLINE ||
inode->i_size >= bsz || inode->i_size < 0) {
inode->i_op = &erofs_symlink_iops;
return 0;
}
lnk = kmalloc(inode->i_size + 1, GFP_KERNEL);
if (!lnk)
return -ENOMEM;
m_pofs += vi->xattr_isize;
/* inline symlink data shouldn't cross block boundary */
if (m_pofs + inode->i_size > bsz) {
kfree(lnk);
erofs_err(inode->i_sb,
"inline data cross block boundary @ nid %llu",
vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
memcpy(lnk, kaddr + m_pofs, inode->i_size);
lnk[inode->i_size] = '\0';
inode->i_link = lnk;
inode->i_op = &erofs_fast_symlink_iops;
return 0;
} }
static int erofs_fill_inode(struct inode *inode) static int erofs_fill_inode(struct inode *inode)
{ {
struct erofs_inode *vi = EROFS_I(inode); struct erofs_inode *vi = EROFS_I(inode);
struct erofs_buf buf = __EROFS_BUF_INITIALIZER; int err;
void *kaddr;
unsigned int ofs;
int err = 0;
trace_erofs_fill_inode(inode); trace_erofs_fill_inode(inode);
/* read inode base data from disk */ /* read inode base data from disk */
kaddr = erofs_read_inode(&buf, inode, &ofs); err = erofs_read_inode(inode);
if (IS_ERR(kaddr)) if (err)
return PTR_ERR(kaddr); return err;
/* setup the new inode */ /* setup the new inode */
switch (inode->i_mode & S_IFMT) { switch (inode->i_mode & S_IFMT) {
...@@ -240,9 +223,10 @@ static int erofs_fill_inode(struct inode *inode) ...@@ -240,9 +223,10 @@ static int erofs_fill_inode(struct inode *inode)
inode_nohighmem(inode); inode_nohighmem(inode);
break; break;
case S_IFLNK: case S_IFLNK:
err = erofs_fill_symlink(inode, kaddr, ofs); if (inode->i_link)
if (err) inode->i_op = &erofs_fast_symlink_iops;
goto out_unlock; else
inode->i_op = &erofs_symlink_iops;
inode_nohighmem(inode); inode_nohighmem(inode);
break; break;
case S_IFCHR: case S_IFCHR:
...@@ -251,10 +235,9 @@ static int erofs_fill_inode(struct inode *inode) ...@@ -251,10 +235,9 @@ static int erofs_fill_inode(struct inode *inode)
case S_IFSOCK: case S_IFSOCK:
inode->i_op = &erofs_generic_iops; inode->i_op = &erofs_generic_iops;
init_special_inode(inode, inode->i_mode, inode->i_rdev); init_special_inode(inode, inode->i_mode, inode->i_rdev);
goto out_unlock; return 0;
default: default:
err = -EFSCORRUPTED; return -EFSCORRUPTED;
goto out_unlock;
} }
mapping_set_large_folios(inode->i_mapping); mapping_set_large_folios(inode->i_mapping);
...@@ -268,14 +251,17 @@ static int erofs_fill_inode(struct inode *inode) ...@@ -268,14 +251,17 @@ static int erofs_fill_inode(struct inode *inode)
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
#endif #endif
} else { } else {
inode->i_mapping->a_ops = &erofs_raw_access_aops; inode->i_mapping->a_ops = &erofs_aops;
#ifdef CONFIG_EROFS_FS_ONDEMAND #ifdef CONFIG_EROFS_FS_ONDEMAND
if (erofs_is_fscache_mode(inode->i_sb)) if (erofs_is_fscache_mode(inode->i_sb))
inode->i_mapping->a_ops = &erofs_fscache_access_aops; inode->i_mapping->a_ops = &erofs_fscache_access_aops;
#endif
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
if (erofs_is_fileio_mode(EROFS_SB(inode->i_sb)))
inode->i_mapping->a_ops = &erofs_fileio_aops;
#endif #endif
} }
out_unlock:
erofs_put_metabuf(&buf);
return err; return err;
} }
......
...@@ -49,7 +49,7 @@ typedef u32 erofs_blk_t; ...@@ -49,7 +49,7 @@ typedef u32 erofs_blk_t;
struct erofs_device_info { struct erofs_device_info {
char *path; char *path;
struct erofs_fscache *fscache; struct erofs_fscache *fscache;
struct file *bdev_file; struct file *file;
struct dax_device *dax_dev; struct dax_device *dax_dev;
u64 dax_part_off; u64 dax_part_off;
...@@ -130,6 +130,7 @@ struct erofs_sb_info { ...@@ -130,6 +130,7 @@ struct erofs_sb_info {
struct erofs_sb_lz4_info lz4; struct erofs_sb_lz4_info lz4;
#endif /* CONFIG_EROFS_FS_ZIP */ #endif /* CONFIG_EROFS_FS_ZIP */
struct file *fdev;
struct inode *packed_inode; struct inode *packed_inode;
struct erofs_dev_context *devs; struct erofs_dev_context *devs;
struct dax_device *dax_dev; struct dax_device *dax_dev;
...@@ -190,9 +191,15 @@ struct erofs_sb_info { ...@@ -190,9 +191,15 @@ struct erofs_sb_info {
#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option) #define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option)
#define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option) #define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option)
static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi)
{
return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->fdev;
}
static inline bool erofs_is_fscache_mode(struct super_block *sb) static inline bool erofs_is_fscache_mode(struct super_block *sb)
{ {
return IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && !sb->s_bdev; return IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) &&
!erofs_is_fileio_mode(EROFS_SB(sb)) && !sb->s_bdev;
} }
enum { enum {
...@@ -365,6 +372,7 @@ struct erofs_map_dev { ...@@ -365,6 +372,7 @@ struct erofs_map_dev {
struct erofs_fscache *m_fscache; struct erofs_fscache *m_fscache;
struct block_device *m_bdev; struct block_device *m_bdev;
struct dax_device *m_daxdev; struct dax_device *m_daxdev;
struct file *m_fp;
u64 m_dax_part_off; u64 m_dax_part_off;
erofs_off_t m_pa; erofs_off_t m_pa;
...@@ -373,7 +381,8 @@ struct erofs_map_dev { ...@@ -373,7 +381,8 @@ struct erofs_map_dev {
extern const struct super_operations erofs_sops; extern const struct super_operations erofs_sops;
extern const struct address_space_operations erofs_raw_access_aops; extern const struct address_space_operations erofs_aops;
extern const struct address_space_operations erofs_fileio_aops;
extern const struct address_space_operations z_erofs_aops; extern const struct address_space_operations z_erofs_aops;
extern const struct address_space_operations erofs_fscache_access_aops; extern const struct address_space_operations erofs_fscache_access_aops;
...@@ -404,6 +413,9 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); ...@@ -404,6 +413,9 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len); u64 start, u64 len);
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map); int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map);
void erofs_onlinefolio_init(struct folio *folio);
void erofs_onlinefolio_split(struct folio *folio);
void erofs_onlinefolio_end(struct folio *folio, int err);
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid);
int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, struct kstat *stat, u32 request_mask,
...@@ -477,6 +489,14 @@ static inline void z_erofs_exit_subsystem(void) {} ...@@ -477,6 +489,14 @@ static inline void z_erofs_exit_subsystem(void) {}
static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; } static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif /* !CONFIG_EROFS_FS_ZIP */ #endif /* !CONFIG_EROFS_FS_ZIP */
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
struct bio *erofs_fileio_bio_alloc(struct erofs_map_dev *mdev);
void erofs_fileio_submit_bio(struct bio *bio);
#else
static inline struct bio *erofs_fileio_bio_alloc(struct erofs_map_dev *mdev) { return NULL; }
static inline void erofs_fileio_submit_bio(struct bio *bio) {}
#endif
#ifdef CONFIG_EROFS_FS_ONDEMAND #ifdef CONFIG_EROFS_FS_ONDEMAND
int erofs_fscache_register_fs(struct super_block *sb); int erofs_fscache_register_fs(struct super_block *sb);
void erofs_fscache_unregister_fs(struct super_block *sb); void erofs_fscache_unregister_fs(struct super_block *sb);
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/fs_context.h> #include <linux/fs_context.h>
#include <linux/fs_parser.h> #include <linux/fs_parser.h>
#include <linux/exportfs.h> #include <linux/exportfs.h>
#include <linux/backing-dev.h>
#include "xattr.h" #include "xattr.h"
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
...@@ -161,7 +162,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, ...@@ -161,7 +162,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_fscache *fscache; struct erofs_fscache *fscache;
struct erofs_deviceslot *dis; struct erofs_deviceslot *dis;
struct file *bdev_file; struct file *file;
dis = erofs_read_metabuf(buf, sb, *pos, EROFS_KMAP); dis = erofs_read_metabuf(buf, sb, *pos, EROFS_KMAP);
if (IS_ERR(dis)) if (IS_ERR(dis))
...@@ -183,13 +184,17 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, ...@@ -183,13 +184,17 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
return PTR_ERR(fscache); return PTR_ERR(fscache);
dif->fscache = fscache; dif->fscache = fscache;
} else if (!sbi->devs->flatdev) { } else if (!sbi->devs->flatdev) {
bdev_file = bdev_file_open_by_path(dif->path, BLK_OPEN_READ, file = erofs_is_fileio_mode(sbi) ?
sb->s_type, NULL); filp_open(dif->path, O_RDONLY | O_LARGEFILE, 0) :
if (IS_ERR(bdev_file)) bdev_file_open_by_path(dif->path,
return PTR_ERR(bdev_file); BLK_OPEN_READ, sb->s_type, NULL);
dif->bdev_file = bdev_file; if (IS_ERR(file))
dif->dax_dev = fs_dax_get_by_bdev(file_bdev(bdev_file), return PTR_ERR(file);
&dif->dax_part_off, NULL, NULL);
dif->file = file;
if (!erofs_is_fileio_mode(sbi))
dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file),
&dif->dax_part_off, NULL, NULL);
} }
dif->blocks = le32_to_cpu(dis->blocks); dif->blocks = le32_to_cpu(dis->blocks);
...@@ -348,7 +353,7 @@ static int erofs_read_superblock(struct super_block *sb) ...@@ -348,7 +353,7 @@ static int erofs_read_superblock(struct super_block *sb)
ret = erofs_scan_devices(sb, dsb); ret = erofs_scan_devices(sb, dsb);
if (erofs_is_fscache_mode(sb)) if (erofs_is_fscache_mode(sb))
erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!"); erofs_info(sb, "[deprecated] fscache-based on-demand read feature in use. Use at your own risk!");
out: out:
erofs_put_metabuf(&buf); erofs_put_metabuf(&buf);
return ret; return ret;
...@@ -566,15 +571,16 @@ static void erofs_set_sysfs_name(struct super_block *sb) ...@@ -566,15 +571,16 @@ static void erofs_set_sysfs_name(struct super_block *sb)
{ {
struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_sb_info *sbi = EROFS_SB(sb);
if (erofs_is_fscache_mode(sb)) { if (sbi->domain_id)
if (sbi->domain_id) super_set_sysfs_name_generic(sb, "%s,%s", sbi->domain_id,
super_set_sysfs_name_generic(sb, "%s,%s",sbi->domain_id, sbi->fsid);
sbi->fsid); else if (sbi->fsid)
else super_set_sysfs_name_generic(sb, "%s", sbi->fsid);
super_set_sysfs_name_generic(sb, "%s", sbi->fsid); else if (erofs_is_fileio_mode(sbi))
return; super_set_sysfs_name_generic(sb, "%s",
} bdi_dev_name(sb->s_bdi));
super_set_sysfs_name_id(sb); else
super_set_sysfs_name_id(sb);
} }
static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
...@@ -589,14 +595,15 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) ...@@ -589,14 +595,15 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_op = &erofs_sops; sb->s_op = &erofs_sops;
sbi->blkszbits = PAGE_SHIFT; sbi->blkszbits = PAGE_SHIFT;
if (erofs_is_fscache_mode(sb)) { if (!sb->s_bdev) {
sb->s_blocksize = PAGE_SIZE; sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT; sb->s_blocksize_bits = PAGE_SHIFT;
err = erofs_fscache_register_fs(sb); if (erofs_is_fscache_mode(sb)) {
if (err) err = erofs_fscache_register_fs(sb);
return err; if (err)
return err;
}
err = super_setup_bdi(sb); err = super_setup_bdi(sb);
if (err) if (err)
return err; return err;
...@@ -644,7 +651,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) ...@@ -644,7 +651,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_flags |= SB_POSIXACL; sb->s_flags |= SB_POSIXACL;
else else
sb->s_flags &= ~SB_POSIXACL; sb->s_flags &= ~SB_POSIXACL;
erofs_set_sysfs_name(sb);
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
xa_init(&sbi->managed_pslots); xa_init(&sbi->managed_pslots);
...@@ -682,6 +688,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) ...@@ -682,6 +688,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err) if (err)
return err; return err;
erofs_set_sysfs_name(sb);
err = erofs_register_sysfs(sb); err = erofs_register_sysfs(sb);
if (err) if (err)
return err; return err;
...@@ -693,11 +700,24 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) ...@@ -693,11 +700,24 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
static int erofs_fc_get_tree(struct fs_context *fc) static int erofs_fc_get_tree(struct fs_context *fc)
{ {
struct erofs_sb_info *sbi = fc->s_fs_info; struct erofs_sb_info *sbi = fc->s_fs_info;
int ret;
if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid)
return get_tree_nodev(fc, erofs_fc_fill_super); return get_tree_nodev(fc, erofs_fc_fill_super);
return get_tree_bdev(fc, erofs_fc_fill_super); ret = get_tree_bdev(fc, erofs_fc_fill_super);
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
if (ret == -ENOTBLK) {
if (!fc->source)
return invalf(fc, "No source specified");
sbi->fdev = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
if (IS_ERR(sbi->fdev))
return PTR_ERR(sbi->fdev);
return get_tree_nodev(fc, erofs_fc_fill_super);
}
#endif
return ret;
} }
static int erofs_fc_reconfigure(struct fs_context *fc) static int erofs_fc_reconfigure(struct fs_context *fc)
...@@ -727,8 +747,8 @@ static int erofs_release_device_info(int id, void *ptr, void *data) ...@@ -727,8 +747,8 @@ static int erofs_release_device_info(int id, void *ptr, void *data)
struct erofs_device_info *dif = ptr; struct erofs_device_info *dif = ptr;
fs_put_dax(dif->dax_dev, NULL); fs_put_dax(dif->dax_dev, NULL);
if (dif->bdev_file) if (dif->file)
fput(dif->bdev_file); fput(dif->file);
erofs_fscache_unregister_cookie(dif->fscache); erofs_fscache_unregister_cookie(dif->fscache);
dif->fscache = NULL; dif->fscache = NULL;
kfree(dif->path); kfree(dif->path);
...@@ -791,7 +811,7 @@ static void erofs_kill_sb(struct super_block *sb) ...@@ -791,7 +811,7 @@ static void erofs_kill_sb(struct super_block *sb)
{ {
struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_sb_info *sbi = EROFS_SB(sb);
if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || sbi->fdev)
kill_anon_super(sb); kill_anon_super(sb);
else else
kill_block_super(sb); kill_block_super(sb);
...@@ -801,6 +821,8 @@ static void erofs_kill_sb(struct super_block *sb) ...@@ -801,6 +821,8 @@ static void erofs_kill_sb(struct super_block *sb)
erofs_fscache_unregister_fs(sb); erofs_fscache_unregister_fs(sb);
kfree(sbi->fsid); kfree(sbi->fsid);
kfree(sbi->domain_id); kfree(sbi->domain_id);
if (sbi->fdev)
fput(sbi->fdev);
kfree(sbi); kfree(sbi);
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
} }
...@@ -903,7 +925,7 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -903,7 +925,7 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = EROFS_NAME_LEN; buf->f_namelen = EROFS_NAME_LEN;
if (uuid_is_null(&sb->s_uuid)) if (uuid_is_null(&sb->s_uuid))
buf->f_fsid = u64_to_fsid(erofs_is_fscache_mode(sb) ? 0 : buf->f_fsid = u64_to_fsid(!sb->s_bdev ? 0 :
huge_encode_dev(sb->s_bdev->bd_dev)); huge_encode_dev(sb->s_bdev->bd_dev));
else else
buf->f_fsid = uuid_to_fsid(sb->s_uuid.b); buf->f_fsid = uuid_to_fsid(sb->s_uuid.b);
......
...@@ -205,34 +205,16 @@ static struct kobject erofs_feat = { ...@@ -205,34 +205,16 @@ static struct kobject erofs_feat = {
int erofs_register_sysfs(struct super_block *sb) int erofs_register_sysfs(struct super_block *sb)
{ {
struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_sb_info *sbi = EROFS_SB(sb);
char *name;
char *str = NULL;
int err; int err;
if (erofs_is_fscache_mode(sb)) {
if (sbi->domain_id) {
str = kasprintf(GFP_KERNEL, "%s,%s", sbi->domain_id,
sbi->fsid);
if (!str)
return -ENOMEM;
name = str;
} else {
name = sbi->fsid;
}
} else {
name = sb->s_id;
}
sbi->s_kobj.kset = &erofs_root; sbi->s_kobj.kset = &erofs_root;
init_completion(&sbi->s_kobj_unregister); init_completion(&sbi->s_kobj_unregister);
err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", name); err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s",
kfree(str); sb->s_sysfs_name);
if (err) if (err) {
goto put_sb_kobj; kobject_put(&sbi->s_kobj);
return 0; wait_for_completion(&sbi->s_kobj_unregister);
}
put_sb_kobj:
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
return err; return err;
} }
......
This diff is collapsed.
...@@ -687,32 +687,30 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, ...@@ -687,32 +687,30 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
int err = 0; int err = 0;
trace_erofs_map_blocks_enter(inode, map, flags); trace_erofs_map_blocks_enter(inode, map, flags);
if (map->m_la >= inode->i_size) { /* post-EOF unmapped extent */
/* when trying to read beyond EOF, leave it unmapped */
if (map->m_la >= inode->i_size) {
map->m_llen = map->m_la + 1 - inode->i_size; map->m_llen = map->m_la + 1 - inode->i_size;
map->m_la = inode->i_size; map->m_la = inode->i_size;
map->m_flags = 0; map->m_flags = 0;
goto out; } else {
} err = z_erofs_fill_inode_lazy(inode);
if (!err) {
err = z_erofs_fill_inode_lazy(inode); if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
if (err) !vi->z_tailextent_headlcn) {
goto out; map->m_la = 0;
map->m_llen = inode->i_size;
if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) && map->m_flags = EROFS_MAP_MAPPED |
!vi->z_tailextent_headlcn) { EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT;
map->m_la = 0; } else {
map->m_llen = inode->i_size; err = z_erofs_do_map_blocks(inode, map, flags);
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | }
EROFS_MAP_FRAGMENT; }
goto out; if (!err && (map->m_flags & EROFS_MAP_ENCODED) &&
unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE ||
map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE))
err = -EOPNOTSUPP;
if (err)
map->m_llen = 0;
} }
err = z_erofs_do_map_blocks(inode, map, flags);
out:
if (err)
map->m_llen = 0;
trace_erofs_map_blocks_exit(inode, map, flags, err); trace_erofs_map_blocks_exit(inode, map, flags, err);
return err; return err;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment