Commit b5b3097d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'erofs-for-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "In this cycle, we would like to introduce a new feature called big
  pcluster so EROFS can compress file data into more than 1 fs block and
  different pcluster size can be selected for each (sub-)files by
  design.

  The current EROFS test results on my laptop are [1]:

    Testscript: erofs-openbenchmark
    Testdata: enwik9 (1000000000 bytes)
     ________________________________________________________________
    |  file system  |   size    | seq read | rand read | rand9m read |
    |_______________|___________|_ MiB/s __|__ MiB/s __|___ MiB/s ___|
    |___erofs_4k____|_556879872_|_ 781.4 __|__ 55.3 ___|___ 25.3  ___|
    |___erofs_16k___|_452509696_|_ 864.8 __|_ 123.2 ___|___ 20.8  ___|
    |___erofs_32k___|_415223808_|_ 899.8 __|_ 105.8 _*_|___ 16.8 ____|
    |___erofs_64k___|_393814016_|_ 906.6 __|__ 66.6 _*_|___ 11.8 ____|
    |__squashfs_8k__|_556191744_|_  64.9 __|__ 19.3 ___|____ 9.1 ____|
    |__squashfs_16k_|_502661120_|_  98.9 __|__ 38.0 ___|____ 9.8 ____|
    |__squashfs_32k_|_458784768_|_ 115.4 __|__ 71.6 _*_|___ 10.0 ____|
    |_squashfs_128k_|_398204928_|_ 257.2 __|_ 253.8 _*_|___ 10.9 ____|
    |____ext4_4k____|____()_____|_ 786.6 __|__ 28.6 ___|___ 27.8 ____|

  which has been verified but I'd like warn it as experimental for a
  while. This matches erofs-utils dev branch and I'll also release a new
  userspace version for this later.

  Apart from that, several improvements are also included: eg complete a
  missing case for inplace I/O, optimize endio decompression logic for
  non-atomic contexts and support adjustable sliding window size, ... In
  addition to those, there are some cleanups as always.

  Summary:

   - avoid memory failure when applying rolling decompression

   - optimize endio decompression logic for non-atomic contexts

   - complete a missing case which can be safely selected for inplace
     I/O and thus decreasing more memory footprint

   - check unsupported on-disk inode i_format strictly

   - support adjustable lz4 sliding window size to decrease runtime
     memory footprint

   - support on-disk compression configurations

   - support big pcluster decompression

   - several code cleanups / spelling correction"

* tag 'erofs-for-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: (21 commits)
  erofs: enable big pcluster feature
  erofs: support decompress big pcluster for lz4 backend
  erofs: support parsing big pcluster compact indexes
  erofs: support parsing big pcluster compress indexes
  erofs: adjust per-CPU buffers according to max_pclusterblks
  erofs: add big physical cluster definition
  erofs: fix up inplace I/O pointer for big pcluster
  erofs: introduce physical cluster slab pools
  erofs: introduce multipage per-CPU buffers
  erofs: reserve physical_clusterbits[]
  erofs: Clean up spelling mistakes found in fs/erofs
  erofs: add on-disk compression configurations
  erofs: introduce on-disk lz4 fs configurations
  erofs: support adjust lz4 history window size
  erofs: introduce erofs_sb_has_xxx() helpers
  erofs: add unsupported inode i_format check
  erofs: don't use erofs_map_blocks() any more
  erofs: complete a missing case for inplace I/O
  erofs: use sync decompression for atomic contexts only
  erofs: use workqueue decompression for atomic contexts only
  ...
parents befbfe07 8e6c8fa9
...@@ -76,17 +76,3 @@ config EROFS_FS_ZIP ...@@ -76,17 +76,3 @@ config EROFS_FS_ZIP
If you don't want to enable compression feature, say N. If you don't want to enable compression feature, say N.
config EROFS_FS_CLUSTER_PAGE_LIMIT
int "EROFS Cluster Pages Hard Limit"
depends on EROFS_FS_ZIP
range 1 256
default "1"
help
Indicates maximum # of pages of a compressed
physical cluster.
For example, if files in a image were compressed
into 8k-unit, hard limit should not be configured
less than 2. Otherwise, the image will be refused
to mount on this kernel.
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_EROFS_FS) += erofs.o obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
...@@ -109,21 +109,6 @@ static int erofs_map_blocks_flatmode(struct inode *inode, ...@@ -109,21 +109,6 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
return err; return err;
} }
int erofs_map_blocks(struct inode *inode,
struct erofs_map_blocks *map, int flags)
{
if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
int err = z_erofs_map_blocks_iter(inode, map, flags);
if (map->mpage) {
put_page(map->mpage);
map->mpage = NULL;
}
return err;
}
return erofs_map_blocks_flatmode(inode, map, flags);
}
static inline struct bio *erofs_read_raw_page(struct bio *bio, static inline struct bio *erofs_read_raw_page(struct bio *bio,
struct address_space *mapping, struct address_space *mapping,
struct page *page, struct page *page,
...@@ -159,7 +144,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio, ...@@ -159,7 +144,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
erofs_blk_t blknr; erofs_blk_t blknr;
unsigned int blkoff; unsigned int blkoff;
err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); err = erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW);
if (err) if (err)
goto err_out; goto err_out;
...@@ -318,7 +303,7 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block) ...@@ -318,7 +303,7 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
return 0; return 0;
} }
if (!erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW)) if (!erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW))
return erofs_blknr(map.m_pa); return erofs_blknr(map.m_pa);
return 0; return 0;
......
...@@ -28,6 +28,42 @@ struct z_erofs_decompressor { ...@@ -28,6 +28,42 @@ struct z_erofs_decompressor {
char *name; char *name;
}; };
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int size)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
u16 distance;
if (lz4) {
if (size < sizeof(struct z_erofs_lz4_cfgs)) {
erofs_err(sb, "invalid lz4 cfgs, size=%u", size);
return -EINVAL;
}
distance = le16_to_cpu(lz4->max_distance);
sbi->lz4.max_pclusterblks = le16_to_cpu(lz4->max_pclusterblks);
if (!sbi->lz4.max_pclusterblks) {
sbi->lz4.max_pclusterblks = 1; /* reserved case */
} else if (sbi->lz4.max_pclusterblks >
Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
erofs_err(sb, "too large lz4 pclusterblks %u",
sbi->lz4.max_pclusterblks);
return -EINVAL;
} else if (sbi->lz4.max_pclusterblks >= 2) {
erofs_info(sb, "EXPERIMENTAL big pcluster feature in use. Use at your own risk!");
}
} else {
distance = le16_to_cpu(dsb->u1.lz4_max_distance);
sbi->lz4.max_pclusterblks = 1;
}
sbi->lz4.max_distance_pages = distance ?
DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
LZ4_MAX_DISTANCE_PAGES;
return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
}
static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq, static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
struct list_head *pagepool) struct list_head *pagepool)
{ {
...@@ -36,6 +72,8 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq, ...@@ -36,6 +72,8 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL }; struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES, unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
BITS_PER_LONG)] = { 0 }; BITS_PER_LONG)] = { 0 };
unsigned int lz4_max_distance_pages =
EROFS_SB(rq->sb)->lz4.max_distance_pages;
void *kaddr = NULL; void *kaddr = NULL;
unsigned int i, j, top; unsigned int i, j, top;
...@@ -44,14 +82,14 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq, ...@@ -44,14 +82,14 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
struct page *const page = rq->out[i]; struct page *const page = rq->out[i];
struct page *victim; struct page *victim;
if (j >= LZ4_MAX_DISTANCE_PAGES) if (j >= lz4_max_distance_pages)
j = 0; j = 0;
/* 'valid' bounced can only be tested after a complete round */ /* 'valid' bounced can only be tested after a complete round */
if (test_bit(j, bounced)) { if (test_bit(j, bounced)) {
DBG_BUGON(i < LZ4_MAX_DISTANCE_PAGES); DBG_BUGON(i < lz4_max_distance_pages);
DBG_BUGON(top >= LZ4_MAX_DISTANCE_PAGES); DBG_BUGON(top >= lz4_max_distance_pages);
availables[top++] = rq->out[i - LZ4_MAX_DISTANCE_PAGES]; availables[top++] = rq->out[i - lz4_max_distance_pages];
} }
if (page) { if (page) {
...@@ -73,9 +111,8 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq, ...@@ -73,9 +111,8 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
victim = availables[--top]; victim = availables[--top];
get_page(victim); get_page(victim);
} else { } else {
victim = erofs_allocpage(pagepool, GFP_KERNEL); victim = erofs_allocpage(pagepool,
if (!victim) GFP_KERNEL | __GFP_NOFAIL);
return -ENOMEM;
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE); set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
} }
rq->out[i] = victim; rq->out[i] = victim;
...@@ -83,96 +120,123 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq, ...@@ -83,96 +120,123 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
return kaddr ? 1 : 0; return kaddr ? 1 : 0;
} }
static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq, static void *z_erofs_handle_inplace_io(struct z_erofs_decompress_req *rq,
u8 *src, unsigned int pageofs_in) void *inpage, unsigned int *inputmargin, int *maptype,
bool support_0padding)
{ {
/* unsigned int nrpages_in, nrpages_out;
* if in-place decompression is ongoing, those decompressed unsigned int ofull, oend, inputsize, total, i, j;
* pages should be copied in order to avoid being overlapped. struct page **in;
*/ void *src, *tmp;
struct page **in = rq->in;
u8 *const tmp = erofs_get_pcpubuf(0);
u8 *tmpp = tmp;
unsigned int inlen = rq->inputsize - pageofs_in;
unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in);
while (tmpp < tmp + inlen) { inputsize = rq->inputsize;
nrpages_in = PAGE_ALIGN(inputsize) >> PAGE_SHIFT;
oend = rq->pageofs_out + rq->outputsize;
ofull = PAGE_ALIGN(oend);
nrpages_out = ofull >> PAGE_SHIFT;
if (rq->inplace_io) {
if (rq->partial_decoding || !support_0padding ||
ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize))
goto docopy;
for (i = 0; i < nrpages_in; ++i) {
DBG_BUGON(rq->in[i] == NULL);
for (j = 0; j < nrpages_out - nrpages_in + i; ++j)
if (rq->out[j] == rq->in[i])
goto docopy;
}
}
if (nrpages_in <= 1) {
*maptype = 0;
return inpage;
}
kunmap_atomic(inpage);
might_sleep();
src = erofs_vm_map_ram(rq->in, nrpages_in);
if (!src) if (!src)
src = kmap_atomic(*in); return ERR_PTR(-ENOMEM);
memcpy(tmpp, src + pageofs_in, count); *maptype = 1;
kunmap_atomic(src); return src;
src = NULL;
tmpp += count; docopy:
pageofs_in = 0; /* Or copy compressed data which can be overlapped to per-CPU buffer */
count = PAGE_SIZE; in = rq->in;
src = erofs_get_pcpubuf(nrpages_in);
if (!src) {
DBG_BUGON(1);
kunmap_atomic(inpage);
return ERR_PTR(-EFAULT);
}
tmp = src;
total = rq->inputsize;
while (total) {
unsigned int page_copycnt =
min_t(unsigned int, total, PAGE_SIZE - *inputmargin);
if (!inpage)
inpage = kmap_atomic(*in);
memcpy(tmp, inpage + *inputmargin, page_copycnt);
kunmap_atomic(inpage);
inpage = NULL;
tmp += page_copycnt;
total -= page_copycnt;
++in; ++in;
*inputmargin = 0;
} }
return tmp; *maptype = 2;
return src;
} }
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out) static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
{ {
unsigned int inputmargin, inlen; unsigned int inputmargin;
u8 *src; u8 *headpage, *src;
bool copied, support_0padding; bool support_0padding;
int ret; int ret, maptype;
if (rq->inputsize > PAGE_SIZE) DBG_BUGON(*rq->in == NULL);
return -EOPNOTSUPP; headpage = kmap_atomic(*rq->in);
src = kmap_atomic(*rq->in);
inputmargin = 0; inputmargin = 0;
support_0padding = false; support_0padding = false;
/* decompression inplace is only safe when 0padding is enabled */ /* decompression inplace is only safe when 0padding is enabled */
if (EROFS_SB(rq->sb)->feature_incompat & if (erofs_sb_has_lz4_0padding(EROFS_SB(rq->sb))) {
EROFS_FEATURE_INCOMPAT_LZ4_0PADDING) {
support_0padding = true; support_0padding = true;
while (!src[inputmargin & ~PAGE_MASK]) while (!headpage[inputmargin & ~PAGE_MASK])
if (!(++inputmargin & ~PAGE_MASK)) if (!(++inputmargin & ~PAGE_MASK))
break; break;
if (inputmargin >= rq->inputsize) { if (inputmargin >= rq->inputsize) {
kunmap_atomic(src); kunmap_atomic(headpage);
return -EIO; return -EIO;
} }
} }
copied = false; rq->inputsize -= inputmargin;
inlen = rq->inputsize - inputmargin; src = z_erofs_handle_inplace_io(rq, headpage, &inputmargin, &maptype,
if (rq->inplace_io) { support_0padding);
const uint oend = (rq->pageofs_out + if (IS_ERR(src))
rq->outputsize) & ~PAGE_MASK; return PTR_ERR(src);
const uint nr = PAGE_ALIGN(rq->pageofs_out +
rq->outputsize) >> PAGE_SHIFT;
if (rq->partial_decoding || !support_0padding ||
rq->out[nr - 1] != rq->in[0] ||
rq->inputsize - oend <
LZ4_DECOMPRESS_INPLACE_MARGIN(inlen)) {
src = generic_copy_inplace_data(rq, src, inputmargin);
inputmargin = 0;
copied = true;
}
}
/* legacy format could compress extra data in a pcluster. */ /* legacy format could compress extra data in a pcluster. */
if (rq->partial_decoding || !support_0padding) if (rq->partial_decoding || !support_0padding)
ret = LZ4_decompress_safe_partial(src + inputmargin, out, ret = LZ4_decompress_safe_partial(src + inputmargin, out,
inlen, rq->outputsize, rq->inputsize, rq->outputsize, rq->outputsize);
rq->outputsize);
else else
ret = LZ4_decompress_safe(src + inputmargin, out, ret = LZ4_decompress_safe(src + inputmargin, out,
inlen, rq->outputsize); rq->inputsize, rq->outputsize);
if (ret != rq->outputsize) { if (ret != rq->outputsize) {
erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]", erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
ret, inlen, inputmargin, rq->outputsize); ret, rq->inputsize, inputmargin, rq->outputsize);
WARN_ON(1); WARN_ON(1);
print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET, print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
16, 1, src + inputmargin, inlen, true); 16, 1, src + inputmargin, rq->inputsize, true);
print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET, print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
16, 1, out, rq->outputsize, true); 16, 1, out, rq->outputsize, true);
...@@ -181,10 +245,16 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out) ...@@ -181,10 +245,16 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
ret = -EIO; ret = -EIO;
} }
if (copied) if (maptype == 0) {
erofs_put_pcpubuf(src);
else
kunmap_atomic(src); kunmap_atomic(src);
} else if (maptype == 1) {
vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT);
} else if (maptype == 2) {
erofs_put_pcpubuf(src);
} else {
DBG_BUGON(1);
return -EFAULT;
}
return ret; return ret;
} }
...@@ -234,8 +304,10 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq, ...@@ -234,8 +304,10 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
const struct z_erofs_decompressor *alg = decompressors + rq->alg; const struct z_erofs_decompressor *alg = decompressors + rq->alg;
unsigned int dst_maptype; unsigned int dst_maptype;
void *dst; void *dst;
int ret, i; int ret;
/* two optimized fast paths only for non bigpcluster cases yet */
if (rq->inputsize <= PAGE_SIZE) {
if (nrpages_out == 1 && !rq->inplace_io) { if (nrpages_out == 1 && !rq->inplace_io) {
DBG_BUGON(!*rq->out); DBG_BUGON(!*rq->out);
dst = kmap_atomic(*rq->out); dst = kmap_atomic(*rq->out);
...@@ -249,7 +321,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq, ...@@ -249,7 +321,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
* compressed data is preferred. * compressed data is preferred.
*/ */
if (rq->outputsize <= PAGE_SIZE * 7 / 8) { if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
dst = erofs_get_pcpubuf(0); dst = erofs_get_pcpubuf(1);
if (IS_ERR(dst)) if (IS_ERR(dst))
return PTR_ERR(dst); return PTR_ERR(dst);
...@@ -262,29 +334,21 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq, ...@@ -262,29 +334,21 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
erofs_put_pcpubuf(dst); erofs_put_pcpubuf(dst);
return ret; return ret;
} }
}
/* general decoding path which can be used for all cases */
ret = alg->prepare_destpages(rq, pagepool); ret = alg->prepare_destpages(rq, pagepool);
if (ret < 0) { if (ret < 0)
return ret; return ret;
} else if (ret) { if (ret) {
dst = page_address(*rq->out); dst = page_address(*rq->out);
dst_maptype = 1; dst_maptype = 1;
goto dstmap_out; goto dstmap_out;
} }
i = 0; dst = erofs_vm_map_ram(rq->out, nrpages_out);
while (1) {
dst = vm_map_ram(rq->out, nrpages_out, -1);
/* retry two more times (totally 3 times) */
if (dst || ++i >= 3)
break;
vm_unmap_aliases();
}
if (!dst) if (!dst)
return -ENOMEM; return -ENOMEM;
dst_maptype = 2; dst_maptype = 2;
dstmap_out: dstmap_out:
......
...@@ -18,15 +18,22 @@ ...@@ -18,15 +18,22 @@
* be incompatible with this kernel version. * be incompatible with this kernel version.
*/ */
#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001 #define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001
#define EROFS_ALL_FEATURE_INCOMPAT EROFS_FEATURE_INCOMPAT_LZ4_0PADDING #define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
#define EROFS_ALL_FEATURE_INCOMPAT \
(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER)
/* 128-byte erofs on-disk super block */ #define EROFS_SB_EXTSLOT_SIZE 16
/* erofs on-disk super block (currently 128 bytes) */
struct erofs_super_block { struct erofs_super_block {
__le32 magic; /* file system magic number */ __le32 magic; /* file system magic number */
__le32 checksum; /* crc32c(super_block) */ __le32 checksum; /* crc32c(super_block) */
__le32 feature_compat; __le32 feature_compat;
__u8 blkszbits; /* support block_size == PAGE_SIZE only */ __u8 blkszbits; /* support block_size == PAGE_SIZE only */
__u8 reserved; __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */
__le16 root_nid; /* nid of root directory */ __le16 root_nid; /* nid of root directory */
__le64 inos; /* total valid ino # (== f_files - f_favail) */ __le64 inos; /* total valid ino # (== f_files - f_favail) */
...@@ -39,7 +46,13 @@ struct erofs_super_block { ...@@ -39,7 +46,13 @@ struct erofs_super_block {
__u8 uuid[16]; /* 128-bit uuid for volume */ __u8 uuid[16]; /* 128-bit uuid for volume */
__u8 volume_name[16]; /* volume name */ __u8 volume_name[16]; /* volume name */
__le32 feature_incompat; __le32 feature_incompat;
__u8 reserved2[44]; union {
/* bitmap for available compression algorithms */
__le16 available_compr_algs;
/* customized sliding window size instead of 64k by default */
__le16 lz4_max_distance;
} __packed u1;
__u8 reserved2[42];
}; };
/* /*
...@@ -75,6 +88,9 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode) ...@@ -75,6 +88,9 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
#define EROFS_I_VERSION_BIT 0 #define EROFS_I_VERSION_BIT 0
#define EROFS_I_DATALAYOUT_BIT 1 #define EROFS_I_DATALAYOUT_BIT 1
#define EROFS_I_ALL \
((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1)
/* 32-byte reduced form of an ondisk inode */ /* 32-byte reduced form of an ondisk inode */
struct erofs_inode_compact { struct erofs_inode_compact {
__le16 i_format; /* inode format hints */ __le16 i_format; /* inode format hints */
...@@ -189,20 +205,33 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e) ...@@ -189,20 +205,33 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
e->e_name_len + le16_to_cpu(e->e_value_size)); e->e_name_len + le16_to_cpu(e->e_value_size));
} }
/* maximum supported size of a physical compression cluster */
#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)
/* available compression algorithm types (for h_algorithmtype) */ /* available compression algorithm types (for h_algorithmtype) */
enum { enum {
Z_EROFS_COMPRESSION_LZ4 = 0, Z_EROFS_COMPRESSION_LZ4 = 0,
Z_EROFS_COMPRESSION_MAX Z_EROFS_COMPRESSION_MAX
}; };
#define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1))
/* 14 bytes (+ length field = 16 bytes) */
struct z_erofs_lz4_cfgs {
__le16 max_distance;
__le16 max_pclusterblks;
u8 reserved[10];
} __packed;
/* /*
* bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
* e.g. for 4k logical cluster size, 4B if compacted 2B is off; * e.g. for 4k logical cluster size, 4B if compacted 2B is off;
* (4B) + 2B + (4B) if compacted 2B is on. * (4B) + 2B + (4B) if compacted 2B is on.
* bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
* bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
*/ */
#define Z_EROFS_ADVISE_COMPACTED_2B_BIT 0 #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
#define Z_EROFS_ADVISE_COMPACTED_2B (1 << Z_EROFS_ADVISE_COMPACTED_2B_BIT) #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
struct z_erofs_map_header { struct z_erofs_map_header {
__le32 h_reserved1; __le32 h_reserved1;
...@@ -214,9 +243,7 @@ struct z_erofs_map_header { ...@@ -214,9 +243,7 @@ struct z_erofs_map_header {
__u8 h_algorithmtype; __u8 h_algorithmtype;
/* /*
* bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
* bit 3-4 : (physical - logical) cluster bits of head 1: * bit 3-7 : reserved.
* For example, if logical clustersize = 4096, 1 for 8192.
* bit 5-7 : (physical - logical) cluster bits of head 2.
*/ */
__u8 h_clusterbits; __u8 h_clusterbits;
}; };
...@@ -259,6 +286,13 @@ enum { ...@@ -259,6 +286,13 @@ enum {
#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2
#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0
/*
* D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
* compressed block count of a compressed extent (in logical clusters, aka.
* block count of a pcluster).
*/
#define Z_EROFS_VLE_DI_D0_CBLKCNT (1 << 11)
struct z_erofs_vle_decompressed_index { struct z_erofs_vle_decompressed_index {
__le16 di_advise; __le16 di_advise;
/* where to decompress in the head cluster */ /* where to decompress in the head cluster */
......
...@@ -44,6 +44,13 @@ static struct page *erofs_read_inode(struct inode *inode, ...@@ -44,6 +44,13 @@ static struct page *erofs_read_inode(struct inode *inode,
dic = page_address(page) + *ofs; dic = page_address(page) + *ofs;
ifmt = le16_to_cpu(dic->i_format); ifmt = le16_to_cpu(dic->i_format);
if (ifmt & ~EROFS_I_ALL) {
erofs_err(inode->i_sb, "unsupported i_format %u of nid %llu",
ifmt, vi->nid);
err = -EOPNOTSUPP;
goto err_out;
}
vi->datalayout = erofs_inode_datalayout(ifmt); vi->datalayout = erofs_inode_datalayout(ifmt);
if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) { if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) {
erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu", erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu",
......
...@@ -50,6 +50,8 @@ struct erofs_fs_context { ...@@ -50,6 +50,8 @@ struct erofs_fs_context {
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
/* current strategy of how to use managed cache */ /* current strategy of how to use managed cache */
unsigned char cache_strategy; unsigned char cache_strategy;
/* strategy of sync decompression (false - auto, true - force on) */
bool readahead_sync_decompress;
/* threshold for decompression synchronously */ /* threshold for decompression synchronously */
unsigned int max_sync_decompress_pages; unsigned int max_sync_decompress_pages;
...@@ -57,6 +59,14 @@ struct erofs_fs_context { ...@@ -57,6 +59,14 @@ struct erofs_fs_context {
unsigned int mount_opt; unsigned int mount_opt;
}; };
/* all filesystem-wide lz4 configurations */
struct erofs_sb_lz4_info {
/* # of pages needed for EROFS lz4 rolling decompression */
u16 max_distance_pages;
/* maximum possible blocks for pclusters in the filesystem */
u16 max_pclusterblks;
};
struct erofs_sb_info { struct erofs_sb_info {
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
/* list for all registered superblocks, mainly for shrinker */ /* list for all registered superblocks, mainly for shrinker */
...@@ -67,9 +77,12 @@ struct erofs_sb_info { ...@@ -67,9 +77,12 @@ struct erofs_sb_info {
struct xarray managed_pslots; struct xarray managed_pslots;
unsigned int shrinker_run_no; unsigned int shrinker_run_no;
u16 available_compr_algs;
/* pseudo inode to manage cached pages */ /* pseudo inode to manage cached pages */
struct inode *managed_cache; struct inode *managed_cache;
struct erofs_sb_lz4_info lz4;
#endif /* CONFIG_EROFS_FS_ZIP */ #endif /* CONFIG_EROFS_FS_ZIP */
u32 blocks; u32 blocks;
u32 meta_blkaddr; u32 meta_blkaddr;
...@@ -80,6 +93,7 @@ struct erofs_sb_info { ...@@ -80,6 +93,7 @@ struct erofs_sb_info {
/* inode slot unit size in bit shift */ /* inode slot unit size in bit shift */
unsigned char islotbits; unsigned char islotbits;
u32 sb_size; /* total superblock size */
u32 build_time_nsec; u32 build_time_nsec;
u64 build_time; u64 build_time;
...@@ -182,12 +196,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) ...@@ -182,12 +196,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
return v; return v;
} }
#endif /* !CONFIG_SMP */ #endif /* !CONFIG_SMP */
/* hard limit of pages per compressed cluster */
#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
#define EROFS_PCPUBUF_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES
#else
#define EROFS_PCPUBUF_NR_PAGES 0
#endif /* !CONFIG_EROFS_FS_ZIP */ #endif /* !CONFIG_EROFS_FS_ZIP */
/* we strictly follow PAGE_SIZE and no buffer head yet */ /* we strictly follow PAGE_SIZE and no buffer head yet */
...@@ -216,6 +224,17 @@ static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid) ...@@ -216,6 +224,17 @@ static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits); return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits);
} }
#define EROFS_FEATURE_FUNCS(name, compat, feature) \
static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \
{ \
return sbi->feature_##compat & EROFS_FEATURE_##feature; \
}
EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
/* atomic flag definitions */ /* atomic flag definitions */
#define EROFS_I_EA_INITED_BIT 0 #define EROFS_I_EA_INITED_BIT 0
#define EROFS_I_Z_INITED_BIT 1 #define EROFS_I_Z_INITED_BIT 1
...@@ -244,7 +263,6 @@ struct erofs_inode { ...@@ -244,7 +263,6 @@ struct erofs_inode {
unsigned short z_advise; unsigned short z_advise;
unsigned char z_algorithmtype[2]; unsigned char z_algorithmtype[2];
unsigned char z_logical_clusterbits; unsigned char z_logical_clusterbits;
unsigned char z_physical_clusterbits[2];
}; };
#endif /* CONFIG_EROFS_FS_ZIP */ #endif /* CONFIG_EROFS_FS_ZIP */
}; };
...@@ -287,7 +305,7 @@ extern const struct address_space_operations erofs_raw_access_aops; ...@@ -287,7 +305,7 @@ extern const struct address_space_operations erofs_raw_access_aops;
extern const struct address_space_operations z_erofs_aops; extern const struct address_space_operations z_erofs_aops;
/* /*
* Logical to physical block mapping, used by erofs_map_blocks() * Logical to physical block mapping
* *
* Different with other file systems, it is used for 2 access modes: * Different with other file systems, it is used for 2 access modes:
* *
...@@ -334,7 +352,7 @@ struct erofs_map_blocks { ...@@ -334,7 +352,7 @@ struct erofs_map_blocks {
struct page *mpage; struct page *mpage;
}; };
/* Flags used by erofs_map_blocks() */ /* Flags used by erofs_map_blocks_flatmode() */
#define EROFS_GET_BLOCKS_RAW 0x0001 #define EROFS_GET_BLOCKS_RAW 0x0001
/* zmap.c */ /* zmap.c */
...@@ -356,8 +374,6 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode, ...@@ -356,8 +374,6 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode,
/* data.c */ /* data.c */
struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr); struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr);
int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int);
/* inode.c */ /* inode.c */
static inline unsigned long erofs_inode_hash(erofs_nid_t nid) static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
{ {
...@@ -386,23 +402,30 @@ int erofs_namei(struct inode *dir, struct qstr *name, ...@@ -386,23 +402,30 @@ int erofs_namei(struct inode *dir, struct qstr *name,
/* dir.c */ /* dir.c */
extern const struct file_operations erofs_dir_fops; extern const struct file_operations erofs_dir_fops;
/* utils.c / zdata.c */ static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count)
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
#if (EROFS_PCPUBUF_NR_PAGES > 0)
void *erofs_get_pcpubuf(unsigned int pagenr);
#define erofs_put_pcpubuf(buf) do { \
(void)&(buf); \
preempt_enable(); \
} while (0)
#else
static inline void *erofs_get_pcpubuf(unsigned int pagenr)
{ {
return ERR_PTR(-EOPNOTSUPP); int retried = 0;
while (1) {
void *p = vm_map_ram(pages, count, -1);
/* retry two more times (totally 3 times) */
if (p || ++retried >= 3)
return p;
vm_unmap_aliases();
}
return NULL;
} }
#define erofs_put_pcpubuf(buf) do {} while (0) /* pcpubuf.c */
#endif void *erofs_get_pcpubuf(unsigned int requiredpages);
void erofs_put_pcpubuf(void *ptr);
int erofs_pcpubuf_growsize(unsigned int nrpages);
void erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);
/* utils.c / zdata.c */
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
int erofs_workgroup_put(struct erofs_workgroup *grp); int erofs_workgroup_put(struct erofs_workgroup *grp);
...@@ -421,6 +444,9 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, ...@@ -421,6 +444,9 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
struct erofs_workgroup *egrp); struct erofs_workgroup *egrp);
int erofs_try_to_free_cached_page(struct address_space *mapping, int erofs_try_to_free_cached_page(struct address_space *mapping,
struct page *page); struct page *page);
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int len);
#else #else
static inline void erofs_shrinker_register(struct super_block *sb) {} static inline void erofs_shrinker_register(struct super_block *sb) {}
static inline void erofs_shrinker_unregister(struct super_block *sb) {} static inline void erofs_shrinker_unregister(struct super_block *sb) {}
...@@ -428,6 +454,16 @@ static inline int erofs_init_shrinker(void) { return 0; } ...@@ -428,6 +454,16 @@ static inline int erofs_init_shrinker(void) { return 0; }
static inline void erofs_exit_shrinker(void) {} static inline void erofs_exit_shrinker(void) {}
static inline int z_erofs_init_zip_subsystem(void) { return 0; } static inline int z_erofs_init_zip_subsystem(void) { return 0; }
static inline void z_erofs_exit_zip_subsystem(void) {} static inline void z_erofs_exit_zip_subsystem(void) {}
static inline int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int len)
{
if (lz4 || dsb->u1.lz4_max_distance) {
erofs_err(sb, "lz4 algorithm isn't enabled");
return -EINVAL;
}
return 0;
}
#endif /* !CONFIG_EROFS_FS_ZIP */ #endif /* !CONFIG_EROFS_FS_ZIP */
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) Gao Xiang <xiang@kernel.org>
*
* For low-latency decompression algorithms (e.g. lz4), reserve consecutive
* per-CPU virtual memory (in pages) in advance to store such inplace I/O
* data if inplace decompression is failed (due to unmet inplace margin for
* example).
*/
#include "internal.h"
struct erofs_pcpubuf {
raw_spinlock_t lock;
void *ptr;
struct page **pages;
unsigned int nrpages;
};
static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb);
void *erofs_get_pcpubuf(unsigned int requiredpages)
__acquires(pcb->lock)
{
struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb);
raw_spin_lock(&pcb->lock);
/* check if the per-CPU buffer is too small */
if (requiredpages > pcb->nrpages) {
raw_spin_unlock(&pcb->lock);
put_cpu_var(erofs_pcb);
/* (for sparse checker) pretend pcb->lock is still taken */
__acquire(pcb->lock);
return NULL;
}
return pcb->ptr;
}
void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock)
{
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id());
DBG_BUGON(pcb->ptr != ptr);
raw_spin_unlock(&pcb->lock);
put_cpu_var(erofs_pcb);
}
/* the next step: support per-CPU page buffers hotplug */
int erofs_pcpubuf_growsize(unsigned int nrpages)
{
static DEFINE_MUTEX(pcb_resize_mutex);
static unsigned int pcb_nrpages;
LIST_HEAD(pagepool);
int delta, cpu, ret, i;
mutex_lock(&pcb_resize_mutex);
delta = nrpages - pcb_nrpages;
ret = 0;
/* avoid shrinking pcpubuf, since no idea how many fses rely on */
if (delta <= 0)
goto out;
for_each_possible_cpu(cpu) {
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
struct page **pages, **oldpages;
void *ptr, *old_ptr;
pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL);
if (!pages) {
ret = -ENOMEM;
break;
}
for (i = 0; i < nrpages; ++i) {
pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL);
if (!pages[i]) {
ret = -ENOMEM;
oldpages = pages;
goto free_pagearray;
}
}
ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL);
if (!ptr) {
ret = -ENOMEM;
oldpages = pages;
goto free_pagearray;
}
raw_spin_lock(&pcb->lock);
old_ptr = pcb->ptr;
pcb->ptr = ptr;
oldpages = pcb->pages;
pcb->pages = pages;
i = pcb->nrpages;
pcb->nrpages = nrpages;
raw_spin_unlock(&pcb->lock);
if (!oldpages) {
DBG_BUGON(old_ptr);
continue;
}
if (old_ptr)
vunmap(old_ptr);
free_pagearray:
while (i)
list_add(&oldpages[--i]->lru, &pagepool);
kfree(oldpages);
if (ret)
break;
}
pcb_nrpages = nrpages;
put_pages_list(&pagepool);
out:
mutex_unlock(&pcb_resize_mutex);
return ret;
}
void erofs_pcpubuf_init(void)
{
int cpu;
for_each_possible_cpu(cpu) {
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
raw_spin_lock_init(&pcb->lock);
}
}
void erofs_pcpubuf_exit(void)
{
int cpu, i;
for_each_possible_cpu(cpu) {
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
if (pcb->ptr) {
vunmap(pcb->ptr);
pcb->ptr = NULL;
}
if (!pcb->pages)
continue;
for (i = 0; i < pcb->nrpages; ++i)
if (pcb->pages[i])
put_page(pcb->pages[i]);
kfree(pcb->pages);
pcb->pages = NULL;
}
}
...@@ -122,6 +122,136 @@ static bool check_layout_compatibility(struct super_block *sb, ...@@ -122,6 +122,136 @@ static bool check_layout_compatibility(struct super_block *sb,
return true; return true;
} }
#ifdef CONFIG_EROFS_FS_ZIP
/* read variable-sized metadata, offset will be aligned by 4-byte */
static void *erofs_read_metadata(struct super_block *sb, struct page **pagep,
erofs_off_t *offset, int *lengthp)
{
struct page *page = *pagep;
u8 *buffer, *ptr;
int len, i, cnt;
erofs_blk_t blk;
*offset = round_up(*offset, 4);
blk = erofs_blknr(*offset);
if (!page || page->index != blk) {
if (page) {
unlock_page(page);
put_page(page);
}
page = erofs_get_meta_page(sb, blk);
if (IS_ERR(page))
goto err_nullpage;
}
ptr = kmap(page);
len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]);
if (!len)
len = U16_MAX + 1;
buffer = kmalloc(len, GFP_KERNEL);
if (!buffer) {
buffer = ERR_PTR(-ENOMEM);
goto out;
}
*offset += sizeof(__le16);
*lengthp = len;
for (i = 0; i < len; i += cnt) {
cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i);
blk = erofs_blknr(*offset);
if (!page || page->index != blk) {
if (page) {
kunmap(page);
unlock_page(page);
put_page(page);
}
page = erofs_get_meta_page(sb, blk);
if (IS_ERR(page)) {
kfree(buffer);
goto err_nullpage;
}
ptr = kmap(page);
}
memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt);
*offset += cnt;
}
out:
kunmap(page);
*pagep = page;
return buffer;
err_nullpage:
*pagep = NULL;
return page;
}
static int erofs_load_compr_cfgs(struct super_block *sb,
struct erofs_super_block *dsb)
{
struct erofs_sb_info *sbi;
struct page *page;
unsigned int algs, alg;
erofs_off_t offset;
int size, ret;
sbi = EROFS_SB(sb);
sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs);
if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) {
erofs_err(sb, "try to load compressed fs with unsupported algorithms %x",
sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS);
return -EINVAL;
}
offset = EROFS_SUPER_OFFSET + sbi->sb_size;
page = NULL;
alg = 0;
ret = 0;
for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) {
void *data;
if (!(algs & 1))
continue;
data = erofs_read_metadata(sb, &page, &offset, &size);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto err;
}
switch (alg) {
case Z_EROFS_COMPRESSION_LZ4:
ret = z_erofs_load_lz4_config(sb, dsb, data, size);
break;
default:
DBG_BUGON(1);
ret = -EFAULT;
}
kfree(data);
if (ret)
goto err;
}
err:
if (page) {
unlock_page(page);
put_page(page);
}
return ret;
}
#else
static int erofs_load_compr_cfgs(struct super_block *sb,
struct erofs_super_block *dsb)
{
if (dsb->u1.available_compr_algs) {
erofs_err(sb, "try to load compressed fs when compression is disabled");
return -EINVAL;
}
return 0;
}
#endif
static int erofs_read_superblock(struct super_block *sb) static int erofs_read_superblock(struct super_block *sb)
{ {
struct erofs_sb_info *sbi; struct erofs_sb_info *sbi;
...@@ -149,7 +279,7 @@ static int erofs_read_superblock(struct super_block *sb) ...@@ -149,7 +279,7 @@ static int erofs_read_superblock(struct super_block *sb)
} }
sbi->feature_compat = le32_to_cpu(dsb->feature_compat); sbi->feature_compat = le32_to_cpu(dsb->feature_compat);
if (sbi->feature_compat & EROFS_FEATURE_COMPAT_SB_CHKSUM) { if (erofs_sb_has_sb_chksum(sbi)) {
ret = erofs_superblock_csum_verify(sb, data); ret = erofs_superblock_csum_verify(sb, data);
if (ret) if (ret)
goto out; goto out;
...@@ -166,6 +296,12 @@ static int erofs_read_superblock(struct super_block *sb) ...@@ -166,6 +296,12 @@ static int erofs_read_superblock(struct super_block *sb)
if (!check_layout_compatibility(sb, dsb)) if (!check_layout_compatibility(sb, dsb))
goto out; goto out;
sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE;
if (sbi->sb_size > EROFS_BLKSIZ) {
erofs_err(sb, "invalid sb_extslots %u (more than a fs block)",
sbi->sb_size);
goto out;
}
sbi->blocks = le32_to_cpu(dsb->blocks); sbi->blocks = le32_to_cpu(dsb->blocks);
sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
#ifdef CONFIG_EROFS_FS_XATTR #ifdef CONFIG_EROFS_FS_XATTR
...@@ -187,7 +323,12 @@ static int erofs_read_superblock(struct super_block *sb) ...@@ -187,7 +323,12 @@ static int erofs_read_superblock(struct super_block *sb)
ret = -EFSCORRUPTED; ret = -EFSCORRUPTED;
goto out; goto out;
} }
ret = 0;
/* parse on-disk compression configurations */
if (erofs_sb_has_compr_cfgs(sbi))
ret = erofs_load_compr_cfgs(sb, dsb);
else
ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0);
out: out:
kunmap(page); kunmap(page);
put_page(page); put_page(page);
...@@ -200,6 +341,7 @@ static void erofs_default_options(struct erofs_fs_context *ctx) ...@@ -200,6 +341,7 @@ static void erofs_default_options(struct erofs_fs_context *ctx)
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND; ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
ctx->max_sync_decompress_pages = 3; ctx->max_sync_decompress_pages = 3;
ctx->readahead_sync_decompress = false;
#endif #endif
#ifdef CONFIG_EROFS_FS_XATTR #ifdef CONFIG_EROFS_FS_XATTR
set_opt(ctx, XATTR_USER); set_opt(ctx, XATTR_USER);
...@@ -513,6 +655,7 @@ static int __init erofs_module_init(void) ...@@ -513,6 +655,7 @@ static int __init erofs_module_init(void)
if (err) if (err)
goto shrinker_err; goto shrinker_err;
erofs_pcpubuf_init();
err = z_erofs_init_zip_subsystem(); err = z_erofs_init_zip_subsystem();
if (err) if (err)
goto zip_err; goto zip_err;
...@@ -542,6 +685,7 @@ static void __exit erofs_module_exit(void) ...@@ -542,6 +685,7 @@ static void __exit erofs_module_exit(void)
/* Ensure all RCU free inodes are safe before cache is destroyed. */ /* Ensure all RCU free inodes are safe before cache is destroyed. */
rcu_barrier(); rcu_barrier();
kmem_cache_destroy(erofs_inode_cachep); kmem_cache_destroy(erofs_inode_cachep);
erofs_pcpubuf_exit();
} }
/* get filesystem statistics */ /* get filesystem statistics */
......
...@@ -21,18 +21,6 @@ struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) ...@@ -21,18 +21,6 @@ struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
return page; return page;
} }
#if (EROFS_PCPUBUF_NR_PAGES > 0)
static struct {
u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES];
} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS];
void *erofs_get_pcpubuf(unsigned int pagenr)
{
preempt_disable();
return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE];
}
#endif
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
/* global shrink count (for all mounted EROFS instances) */ /* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt; static atomic_long_t erofs_global_shrink_cnt;
......
This diff is collapsed.
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "internal.h" #include "internal.h"
#include "zpvec.h" #include "zpvec.h"
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
#define Z_EROFS_NR_INLINE_PAGEVECS 3 #define Z_EROFS_NR_INLINE_PAGEVECS 3
/* /*
...@@ -59,16 +60,17 @@ struct z_erofs_pcluster { ...@@ -59,16 +60,17 @@ struct z_erofs_pcluster {
/* A: point to next chained pcluster or TAILs */ /* A: point to next chained pcluster or TAILs */
z_erofs_next_pcluster_t next; z_erofs_next_pcluster_t next;
/* A: compressed pages (including multi-usage pages) */
struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES];
/* A: lower limit of decompressed length and if full length or not */ /* A: lower limit of decompressed length and if full length or not */
unsigned int length; unsigned int length;
/* I: physical cluster size in pages */
unsigned short pclusterpages;
/* I: compression algorithm format */ /* I: compression algorithm format */
unsigned char algorithmformat; unsigned char algorithmformat;
/* I: bit shift of physical cluster size */
unsigned char clusterbits; /* A: compressed pages (can be cached or inplaced pages) */
struct page *compressed_pages[];
}; };
#define z_erofs_primarycollection(pcluster) (&(pcluster)->primary_collection) #define z_erofs_primarycollection(pcluster) (&(pcluster)->primary_collection)
...@@ -82,8 +84,6 @@ struct z_erofs_pcluster { ...@@ -82,8 +84,6 @@ struct z_erofs_pcluster {
#define Z_EROFS_PCLUSTER_NIL (NULL) #define Z_EROFS_PCLUSTER_NIL (NULL)
#define Z_EROFS_WORKGROUP_SIZE sizeof(struct z_erofs_pcluster)
struct z_erofs_decompressqueue { struct z_erofs_decompressqueue {
struct super_block *sb; struct super_block *sb;
atomic_t pending_bios; atomic_t pending_bios;
......
...@@ -11,17 +11,16 @@ ...@@ -11,17 +11,16 @@
int z_erofs_fill_inode(struct inode *inode) int z_erofs_fill_inode(struct inode *inode)
{ {
struct erofs_inode *const vi = EROFS_I(inode); struct erofs_inode *const vi = EROFS_I(inode);
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { if (!erofs_sb_has_big_pcluster(sbi) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
vi->z_advise = 0; vi->z_advise = 0;
vi->z_algorithmtype[0] = 0; vi->z_algorithmtype[0] = 0;
vi->z_algorithmtype[1] = 0; vi->z_algorithmtype[1] = 0;
vi->z_logical_clusterbits = LOG_BLOCK_SIZE; vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits;
vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits;
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
} }
inode->i_mapping->a_ops = &z_erofs_aops; inode->i_mapping->a_ops = &z_erofs_aops;
return 0; return 0;
} }
...@@ -52,7 +51,8 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) ...@@ -52,7 +51,8 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
goto out_unlock; goto out_unlock;
DBG_BUGON(vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, 8); vi->xattr_isize, 8);
...@@ -77,18 +77,22 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) ...@@ -77,18 +77,22 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
} }
vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits + if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
((h->h_clusterbits >> 3) & 3); vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
if (vi->z_physical_clusterbits[0] != LOG_BLOCK_SIZE) { erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
erofs_err(sb, "unsupported physical clusterbits %u for nid %llu, please upgrade kernel", vi->nid);
vi->z_physical_clusterbits[0], vi->nid); err = -EFSCORRUPTED;
err = -EOPNOTSUPP; goto unmap_done;
}
if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
goto unmap_done; goto unmap_done;
} }
vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits +
((h->h_clusterbits >> 5) & 7);
/* paired with smp_mb() at the beginning of the function */ /* paired with smp_mb() at the beginning of the function */
smp_mb(); smp_mb();
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
...@@ -111,7 +115,7 @@ struct z_erofs_maprecorder { ...@@ -111,7 +115,7 @@ struct z_erofs_maprecorder {
u8 type; u8 type;
u16 clusterofs; u16 clusterofs;
u16 delta[2]; u16 delta[2];
erofs_blk_t pblk; erofs_blk_t pblk, compressedlcs;
}; };
static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
...@@ -174,6 +178,15 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, ...@@ -174,6 +178,15 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
m->clusterofs = 1 << vi->z_logical_clusterbits; m->clusterofs = 1 << vi->z_logical_clusterbits;
m->delta[0] = le16_to_cpu(di->di_u.delta[0]); m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
m->compressedlcs = m->delta[0] &
~Z_EROFS_VLE_DI_D0_CBLKCNT;
m->delta[0] = 1;
}
m->delta[1] = le16_to_cpu(di->di_u.delta[1]); m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
break; break;
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
...@@ -210,6 +223,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, ...@@ -210,6 +223,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
unsigned int vcnt, base, lo, encodebits, nblk; unsigned int vcnt, base, lo, encodebits, nblk;
int i; int i;
u8 *in, type; u8 *in, type;
bool big_pcluster;
if (1 << amortizedshift == 4) if (1 << amortizedshift == 4)
vcnt = 2; vcnt = 2;
...@@ -218,6 +232,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, ...@@ -218,6 +232,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
else else
return -EOPNOTSUPP; return -EOPNOTSUPP;
big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
base = round_down(eofs, vcnt << amortizedshift); base = round_down(eofs, vcnt << amortizedshift);
in = m->kaddr + base; in = m->kaddr + base;
...@@ -229,7 +244,15 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, ...@@ -229,7 +244,15 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
m->type = type; m->type = type;
if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
m->clusterofs = 1 << lclusterbits; m->clusterofs = 1 << lclusterbits;
if (i + 1 != vcnt) { if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
if (!big_pcluster) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
m->delta[0] = 1;
return 0;
} else if (i + 1 != (int)vcnt) {
m->delta[0] = lo; m->delta[0] = lo;
return 0; return 0;
} }
...@@ -242,12 +265,15 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, ...@@ -242,12 +265,15 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
in, encodebits * (i - 1), &type); in, encodebits * (i - 1), &type);
if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
lo = 0; lo = 0;
else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT)
lo = 1;
m->delta[0] = lo + 1; m->delta[0] = lo + 1;
return 0; return 0;
} }
m->clusterofs = lo; m->clusterofs = lo;
m->delta[0] = 0; m->delta[0] = 0;
/* figout out blkaddr (pblk) for HEAD lclusters */ /* figout out blkaddr (pblk) for HEAD lclusters */
if (!big_pcluster) {
nblk = 1; nblk = 1;
while (i > 0) { while (i > 0) {
--i; --i;
...@@ -259,6 +285,29 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, ...@@ -259,6 +285,29 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
if (i >= 0) if (i >= 0)
++nblk; ++nblk;
} }
} else {
nblk = 0;
while (i > 0) {
--i;
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
--i;
nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
continue;
}
/* bigpcluster shouldn't have plain d0 == 1 */
if (lo <= 1) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
i -= lo - 2;
continue;
}
++nblk;
}
}
in += (vcnt << amortizedshift) - sizeof(__le32); in += (vcnt << amortizedshift) - sizeof(__le32);
m->pblk = le32_to_cpu(*(__le32 *)in) + nblk; m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
return 0; return 0;
...@@ -381,6 +430,58 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, ...@@ -381,6 +430,58 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
return 0; return 0;
} }
static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
unsigned int initial_lcn)
{
struct erofs_inode *const vi = EROFS_I(m->inode);
struct erofs_map_blocks *const map = m->map;
const unsigned int lclusterbits = vi->z_logical_clusterbits;
unsigned long lcn;
int err;
DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN &&
m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD);
if (!(map->m_flags & EROFS_MAP_ZIPPED) ||
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
map->m_plen = 1 << lclusterbits;
return 0;
}
lcn = m->lcn + 1;
if (m->compressedlcs)
goto out;
if (lcn == initial_lcn)
goto err_bonus_cblkcnt;
err = z_erofs_load_cluster_from_disk(m, lcn);
if (err)
return err;
switch (m->type) {
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
if (m->delta[0] != 1)
goto err_bonus_cblkcnt;
if (m->compressedlcs)
break;
fallthrough;
default:
erofs_err(m->inode->i_sb,
"cannot found CBLKCNT @ lcn %lu of nid %llu",
lcn, vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
out:
map->m_plen = m->compressedlcs << lclusterbits;
return 0;
err_bonus_cblkcnt:
erofs_err(m->inode->i_sb,
"bogus CBLKCNT @ lcn %lu of nid %llu",
lcn, vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
int z_erofs_map_blocks_iter(struct inode *inode, int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map, struct erofs_map_blocks *map,
int flags) int flags)
...@@ -392,6 +493,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, ...@@ -392,6 +493,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
}; };
int err = 0; int err = 0;
unsigned int lclusterbits, endoff; unsigned int lclusterbits, endoff;
unsigned long initial_lcn;
unsigned long long ofs, end; unsigned long long ofs, end;
trace_z_erofs_map_blocks_iter_enter(inode, map, flags); trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
...@@ -410,10 +512,10 @@ int z_erofs_map_blocks_iter(struct inode *inode, ...@@ -410,10 +512,10 @@ int z_erofs_map_blocks_iter(struct inode *inode,
lclusterbits = vi->z_logical_clusterbits; lclusterbits = vi->z_logical_clusterbits;
ofs = map->m_la; ofs = map->m_la;
m.lcn = ofs >> lclusterbits; initial_lcn = ofs >> lclusterbits;
endoff = ofs & ((1 << lclusterbits) - 1); endoff = ofs & ((1 << lclusterbits) - 1);
err = z_erofs_load_cluster_from_disk(&m, m.lcn); err = z_erofs_load_cluster_from_disk(&m, initial_lcn);
if (err) if (err)
goto unmap_out; goto unmap_out;
...@@ -443,7 +545,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, ...@@ -443,7 +545,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
m.delta[0] = 1; m.delta[0] = 1;
fallthrough; fallthrough;
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
/* get the correspoinding first chunk */ /* get the corresponding first chunk */
err = z_erofs_extent_lookback(&m, m.delta[0]); err = z_erofs_extent_lookback(&m, m.delta[0]);
if (err) if (err)
goto unmap_out; goto unmap_out;
...@@ -457,10 +559,12 @@ int z_erofs_map_blocks_iter(struct inode *inode, ...@@ -457,10 +559,12 @@ int z_erofs_map_blocks_iter(struct inode *inode,
} }
map->m_llen = end - map->m_la; map->m_llen = end - map->m_la;
map->m_plen = 1 << lclusterbits;
map->m_pa = blknr_to_addr(m.pblk); map->m_pa = blknr_to_addr(m.pblk);
map->m_flags |= EROFS_MAP_MAPPED; map->m_flags |= EROFS_MAP_MAPPED;
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
if (err)
goto out;
unmap_out: unmap_out:
if (m.kaddr) if (m.kaddr)
kunmap_atomic(m.kaddr); kunmap_atomic(m.kaddr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment