Commit 9149fe8b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'erofs-for-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "In this cycle, tail-packing data inline for compressed files is now
  supported so that tail pcluster can be stored and read together with
  inode metadata in order to save data I/O and storage space.

  In addition to that, to prepare for the upcoming subpage, folio and
  fscache features, we also introduce meta buffers to get rid of
  erofs_get_meta_page() since it was too close to the page itself.

  In addition, in order to show supported kernel features and control
  sync decompression strategy, new sysfs nodes are introduced in this
  cycle as well.

  Summary:

   - add sysfs interface and a sysfs node to control sync decompression

   - add tail-packing inline support for compressed files

   - get rid of erofs_get_meta_page()"

* tag 'erofs-for-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: use meta buffers for zmap operations
  erofs: use meta buffers for xattr operations
  erofs: use meta buffers for super operations
  erofs: use meta buffers for inode operations
  erofs: introduce meta buffer operations
  erofs: add on-disk compressed tail-packing inline support
  erofs: support inline data decompression
  erofs: support unaligned data decompression
  erofs: introduce z_erofs_fixup_insize
  erofs: tidy up z_erofs_lz4_decompress
  erofs: clean up erofs_map_blocks tracepoints
  erofs: Replace zero-length array with flexible-array member
  erofs: add sysfs node to control sync decompression strategy
  erofs: add sysfs interface
  erofs: rename lz4_0pading to zero_padding
parents 579f3a6d 09c54379
What: /sys/fs/erofs/features/
Date: November 2021
Contact: "Huang Jianan" <huangjianan@oppo.com>
Description: Shows all enabled kernel features.
Supported features:
zero_padding, compr_cfgs, big_pcluster, chunked_file,
device_table, compr_head2, sb_chksum.
What: /sys/fs/erofs/<disk>/sync_decompress
Date: November 2021
Contact: "Huang Jianan" <huangjianan@oppo.com>
Description: Control strategy of sync decompression
- 0 (default, auto): enable for readpage, and enable for
readahead on atomic contexts only,
- 1 (force on): enable for readpage and readahead.
- 2 (force off): disable for all situations.
...@@ -93,6 +93,14 @@ dax A legacy option which is an alias for ``dax=always``. ...@@ -93,6 +93,14 @@ dax A legacy option which is an alias for ``dax=always``.
device=%s Specify a path to an extra device to be used together. device=%s Specify a path to an extra device to be used together.
=================== ========================================================= =================== =========================================================
Sysfs Entries
=============
Information about mounted erofs file systems can be found in /sys/fs/erofs.
Each mounted filesystem will have a directory in /sys/fs/erofs based on its
device name (i.e., /sys/fs/erofs/sda).
(see also Documentation/ABI/testing/sysfs-fs-erofs)
On-disk details On-disk details
=============== ===============
......
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_EROFS_FS) += erofs.o obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o sysfs.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
...@@ -12,7 +12,7 @@ struct z_erofs_decompress_req { ...@@ -12,7 +12,7 @@ struct z_erofs_decompress_req {
struct super_block *sb; struct super_block *sb;
struct page **in, **out; struct page **in, **out;
unsigned short pageofs_out; unsigned short pageofs_in, pageofs_out;
unsigned int inputsize, outputsize; unsigned int inputsize, outputsize;
/* indicate the algorithm will be used for decompression */ /* indicate the algorithm will be used for decompression */
...@@ -87,6 +87,8 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, ...@@ -87,6 +87,8 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
return page->mapping == MNGD_MAPPING(sbi); return page->mapping == MNGD_MAPPING(sbi);
} }
int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize);
int z_erofs_decompress(struct z_erofs_decompress_req *rq, int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool); struct page **pagepool);
......
...@@ -9,37 +9,71 @@ ...@@ -9,37 +9,71 @@
#include <linux/dax.h> #include <linux/dax.h>
#include <trace/events/erofs.h> #include <trace/events/erofs.h>
struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr) void erofs_unmap_metabuf(struct erofs_buf *buf)
{
if (buf->kmap_type == EROFS_KMAP)
kunmap(buf->page);
else if (buf->kmap_type == EROFS_KMAP_ATOMIC)
kunmap_atomic(buf->base);
buf->base = NULL;
buf->kmap_type = EROFS_NO_KMAP;
}
void erofs_put_metabuf(struct erofs_buf *buf)
{
if (!buf->page)
return;
erofs_unmap_metabuf(buf);
put_page(buf->page);
buf->page = NULL;
}
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
erofs_blk_t blkaddr, enum erofs_kmap_type type)
{ {
struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
struct page *page; erofs_off_t offset = blknr_to_addr(blkaddr);
pgoff_t index = offset >> PAGE_SHIFT;
page = read_cache_page_gfp(mapping, blkaddr, struct page *page = buf->page;
mapping_gfp_constraint(mapping, ~__GFP_FS));
/* should already be PageUptodate */ if (!page || page->index != index) {
if (!IS_ERR(page)) erofs_put_metabuf(buf);
lock_page(page); page = read_cache_page_gfp(mapping, index,
return page; mapping_gfp_constraint(mapping, ~__GFP_FS));
if (IS_ERR(page))
return page;
/* should already be PageUptodate, no need to lock page */
buf->page = page;
}
if (buf->kmap_type == EROFS_NO_KMAP) {
if (type == EROFS_KMAP)
buf->base = kmap(page);
else if (type == EROFS_KMAP_ATOMIC)
buf->base = kmap_atomic(page);
buf->kmap_type = type;
} else if (buf->kmap_type != type) {
DBG_BUGON(1);
return ERR_PTR(-EFAULT);
}
if (type == EROFS_NO_KMAP)
return NULL;
return buf->base + (offset & ~PAGE_MASK);
} }
static int erofs_map_blocks_flatmode(struct inode *inode, static int erofs_map_blocks_flatmode(struct inode *inode,
struct erofs_map_blocks *map, struct erofs_map_blocks *map,
int flags) int flags)
{ {
int err = 0;
erofs_blk_t nblocks, lastblk; erofs_blk_t nblocks, lastblk;
u64 offset = map->m_la; u64 offset = map->m_la;
struct erofs_inode *vi = EROFS_I(inode); struct erofs_inode *vi = EROFS_I(inode);
bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
trace_erofs_map_blocks_flatmode_enter(inode, map, flags); nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
lastblk = nblocks - tailendpacking; lastblk = nblocks - tailendpacking;
/* there is no hole in flatmode */ /* there is no hole in flatmode */
map->m_flags = EROFS_MAP_MAPPED; map->m_flags = EROFS_MAP_MAPPED;
if (offset < blknr_to_addr(lastblk)) { if (offset < blknr_to_addr(lastblk)) {
map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la; map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
map->m_plen = blknr_to_addr(lastblk) - offset; map->m_plen = blknr_to_addr(lastblk) - offset;
...@@ -51,30 +85,23 @@ static int erofs_map_blocks_flatmode(struct inode *inode, ...@@ -51,30 +85,23 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
vi->xattr_isize + erofs_blkoff(map->m_la); vi->xattr_isize + erofs_blkoff(map->m_la);
map->m_plen = inode->i_size - offset; map->m_plen = inode->i_size - offset;
/* inline data should be located in one meta block */ /* inline data should be located in the same meta block */
if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) { if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) {
erofs_err(inode->i_sb, erofs_err(inode->i_sb,
"inline data cross block boundary @ nid %llu", "inline data cross block boundary @ nid %llu",
vi->nid); vi->nid);
DBG_BUGON(1); DBG_BUGON(1);
err = -EFSCORRUPTED; return -EFSCORRUPTED;
goto err_out;
} }
map->m_flags |= EROFS_MAP_META; map->m_flags |= EROFS_MAP_META;
} else { } else {
erofs_err(inode->i_sb, erofs_err(inode->i_sb,
"internal error @ nid: %llu (size %llu), m_la 0x%llx", "internal error @ nid: %llu (size %llu), m_la 0x%llx",
vi->nid, inode->i_size, map->m_la); vi->nid, inode->i_size, map->m_la);
DBG_BUGON(1); DBG_BUGON(1);
err = -EIO; return -EIO;
goto err_out;
} }
return 0;
map->m_llen = map->m_plen;
err_out:
trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
return err;
} }
static int erofs_map_blocks(struct inode *inode, static int erofs_map_blocks(struct inode *inode,
...@@ -83,12 +110,14 @@ static int erofs_map_blocks(struct inode *inode, ...@@ -83,12 +110,14 @@ static int erofs_map_blocks(struct inode *inode,
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct erofs_inode *vi = EROFS_I(inode); struct erofs_inode *vi = EROFS_I(inode);
struct erofs_inode_chunk_index *idx; struct erofs_inode_chunk_index *idx;
struct page *page; struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
u64 chunknr; u64 chunknr;
unsigned int unit; unsigned int unit;
erofs_off_t pos; erofs_off_t pos;
void *kaddr;
int err = 0; int err = 0;
trace_erofs_map_blocks_enter(inode, map, flags);
map->m_deviceid = 0; map->m_deviceid = 0;
if (map->m_la >= inode->i_size) { if (map->m_la >= inode->i_size) {
/* leave out-of-bound access unmapped */ /* leave out-of-bound access unmapped */
...@@ -97,8 +126,10 @@ static int erofs_map_blocks(struct inode *inode, ...@@ -97,8 +126,10 @@ static int erofs_map_blocks(struct inode *inode,
goto out; goto out;
} }
if (vi->datalayout != EROFS_INODE_CHUNK_BASED) if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
return erofs_map_blocks_flatmode(inode, map, flags); err = erofs_map_blocks_flatmode(inode, map, flags);
goto out;
}
if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
unit = sizeof(*idx); /* chunk index */ unit = sizeof(*idx); /* chunk index */
...@@ -109,17 +140,18 @@ static int erofs_map_blocks(struct inode *inode, ...@@ -109,17 +140,18 @@ static int erofs_map_blocks(struct inode *inode,
pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, unit) + unit * chunknr; vi->xattr_isize, unit) + unit * chunknr;
page = erofs_get_meta_page(inode->i_sb, erofs_blknr(pos)); kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
if (IS_ERR(page)) if (IS_ERR(kaddr)) {
return PTR_ERR(page); err = PTR_ERR(kaddr);
goto out;
}
map->m_la = chunknr << vi->chunkbits; map->m_la = chunknr << vi->chunkbits;
map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits, map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
roundup(inode->i_size - map->m_la, EROFS_BLKSIZ)); roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
/* handle block map */ /* handle block map */
if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
__le32 *blkaddr = page_address(page) + erofs_blkoff(pos); __le32 *blkaddr = kaddr + erofs_blkoff(pos);
if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
map->m_flags = 0; map->m_flags = 0;
...@@ -130,7 +162,7 @@ static int erofs_map_blocks(struct inode *inode, ...@@ -130,7 +162,7 @@ static int erofs_map_blocks(struct inode *inode,
goto out_unlock; goto out_unlock;
} }
/* parse chunk indexes */ /* parse chunk indexes */
idx = page_address(page) + erofs_blkoff(pos); idx = kaddr + erofs_blkoff(pos);
switch (le32_to_cpu(idx->blkaddr)) { switch (le32_to_cpu(idx->blkaddr)) {
case EROFS_NULL_ADDR: case EROFS_NULL_ADDR:
map->m_flags = 0; map->m_flags = 0;
...@@ -143,10 +175,11 @@ static int erofs_map_blocks(struct inode *inode, ...@@ -143,10 +175,11 @@ static int erofs_map_blocks(struct inode *inode,
break; break;
} }
out_unlock: out_unlock:
unlock_page(page); erofs_put_metabuf(&buf);
put_page(page);
out: out:
map->m_llen = map->m_plen; if (!err)
map->m_llen = map->m_plen;
trace_erofs_map_blocks_exit(inode, map, flags, 0);
return err; return err;
} }
...@@ -231,16 +264,16 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, ...@@ -231,16 +264,16 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
} }
if (map.m_flags & EROFS_MAP_META) { if (map.m_flags & EROFS_MAP_META) {
struct page *ipage; void *ptr;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
iomap->type = IOMAP_INLINE; iomap->type = IOMAP_INLINE;
ipage = erofs_get_meta_page(inode->i_sb, ptr = erofs_read_metabuf(&buf, inode->i_sb,
erofs_blknr(mdev.m_pa)); erofs_blknr(mdev.m_pa), EROFS_KMAP);
if (IS_ERR(ipage)) if (IS_ERR(ptr))
return PTR_ERR(ipage); return PTR_ERR(ptr);
iomap->inline_data = page_address(ipage) + iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa);
erofs_blkoff(mdev.m_pa); iomap->private = buf.base;
iomap->private = ipage;
} else { } else {
iomap->type = IOMAP_MAPPED; iomap->type = IOMAP_MAPPED;
iomap->addr = mdev.m_pa; iomap->addr = mdev.m_pa;
...@@ -251,12 +284,17 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, ...@@ -251,12 +284,17 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length, static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
ssize_t written, unsigned int flags, struct iomap *iomap) ssize_t written, unsigned int flags, struct iomap *iomap)
{ {
struct page *ipage = iomap->private; void *ptr = iomap->private;
if (ptr) {
struct erofs_buf buf = {
.page = kmap_to_page(ptr),
.base = ptr,
.kmap_type = EROFS_KMAP,
};
if (ipage) {
DBG_BUGON(iomap->type != IOMAP_INLINE); DBG_BUGON(iomap->type != IOMAP_INLINE);
unlock_page(ipage); erofs_put_metabuf(&buf);
put_page(ipage);
} else { } else {
DBG_BUGON(iomap->type == IOMAP_INLINE); DBG_BUGON(iomap->type == IOMAP_INLINE);
} }
......
...@@ -16,6 +16,14 @@ ...@@ -16,6 +16,14 @@
#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32) #define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
#endif #endif
struct z_erofs_lz4_decompress_ctx {
struct z_erofs_decompress_req *rq;
/* # of encoded, decoded pages */
unsigned int inpages, outpages;
/* decoded block total length (used for in-place decompression) */
unsigned int oend;
};
int z_erofs_load_lz4_config(struct super_block *sb, int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb, struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int size) struct z_erofs_lz4_cfgs *lz4, int size)
...@@ -56,11 +64,10 @@ int z_erofs_load_lz4_config(struct super_block *sb, ...@@ -56,11 +64,10 @@ int z_erofs_load_lz4_config(struct super_block *sb,
* Fill all gaps with bounce pages if it's a sparse page list. Also check if * Fill all gaps with bounce pages if it's a sparse page list. Also check if
* all physical pages are consecutive, which can be seen for moderate CR. * all physical pages are consecutive, which can be seen for moderate CR.
*/ */
static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
struct page **pagepool) struct page **pagepool)
{ {
const unsigned int nr = struct z_erofs_decompress_req *rq = ctx->rq;
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL }; struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES, unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
BITS_PER_LONG)] = { 0 }; BITS_PER_LONG)] = { 0 };
...@@ -70,7 +77,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, ...@@ -70,7 +77,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
unsigned int i, j, top; unsigned int i, j, top;
top = 0; top = 0;
for (i = j = 0; i < nr; ++i, ++j) { for (i = j = 0; i < ctx->outpages; ++i, ++j) {
struct page *const page = rq->out[i]; struct page *const page = rq->out[i];
struct page *victim; struct page *victim;
...@@ -112,41 +119,36 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, ...@@ -112,41 +119,36 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
return kaddr ? 1 : 0; return kaddr ? 1 : 0;
} }
static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq, static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
void *inpage, unsigned int *inputmargin, int *maptype, void *inpage, unsigned int *inputmargin, int *maptype,
bool support_0padding) bool may_inplace)
{ {
unsigned int nrpages_in, nrpages_out; struct z_erofs_decompress_req *rq = ctx->rq;
unsigned int ofull, oend, inputsize, total, i, j; unsigned int omargin, total, i, j;
struct page **in; struct page **in;
void *src, *tmp; void *src, *tmp;
inputsize = rq->inputsize;
nrpages_in = PAGE_ALIGN(inputsize) >> PAGE_SHIFT;
oend = rq->pageofs_out + rq->outputsize;
ofull = PAGE_ALIGN(oend);
nrpages_out = ofull >> PAGE_SHIFT;
if (rq->inplace_io) { if (rq->inplace_io) {
if (rq->partial_decoding || !support_0padding || omargin = PAGE_ALIGN(ctx->oend) - ctx->oend;
ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize)) if (rq->partial_decoding || !may_inplace ||
omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize))
goto docopy; goto docopy;
for (i = 0; i < nrpages_in; ++i) { for (i = 0; i < ctx->inpages; ++i) {
DBG_BUGON(rq->in[i] == NULL); DBG_BUGON(rq->in[i] == NULL);
for (j = 0; j < nrpages_out - nrpages_in + i; ++j) for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j)
if (rq->out[j] == rq->in[i]) if (rq->out[j] == rq->in[i])
goto docopy; goto docopy;
} }
} }
if (nrpages_in <= 1) { if (ctx->inpages <= 1) {
*maptype = 0; *maptype = 0;
return inpage; return inpage;
} }
kunmap_atomic(inpage); kunmap_atomic(inpage);
might_sleep(); might_sleep();
src = erofs_vm_map_ram(rq->in, nrpages_in); src = erofs_vm_map_ram(rq->in, ctx->inpages);
if (!src) if (!src)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
*maptype = 1; *maptype = 1;
...@@ -155,7 +157,7 @@ static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq, ...@@ -155,7 +157,7 @@ static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq,
docopy: docopy:
/* Or copy compressed data which can be overlapped to per-CPU buffer */ /* Or copy compressed data which can be overlapped to per-CPU buffer */
in = rq->in; in = rq->in;
src = erofs_get_pcpubuf(nrpages_in); src = erofs_get_pcpubuf(ctx->inpages);
if (!src) { if (!src) {
DBG_BUGON(1); DBG_BUGON(1);
kunmap_atomic(inpage); kunmap_atomic(inpage);
...@@ -182,36 +184,53 @@ static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq, ...@@ -182,36 +184,53 @@ static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq,
return src; return src;
} }
static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, /*
* Get the exact inputsize with zero_padding feature.
* - For LZ4, it should work if zero_padding feature is on (5.3+);
* - For MicroLZMA, it'd be enabled all the time.
*/
int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize)
{
const char *padend;
padend = memchr_inv(padbuf, 0, padbufsize);
if (!padend)
return -EFSCORRUPTED;
rq->inputsize -= padend - padbuf;
rq->pageofs_in += padend - padbuf;
return 0;
}
static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
u8 *out) u8 *out)
{ {
struct z_erofs_decompress_req *rq = ctx->rq;
bool support_0padding = false, may_inplace = false;
unsigned int inputmargin; unsigned int inputmargin;
u8 *headpage, *src; u8 *headpage, *src;
bool support_0padding;
int ret, maptype; int ret, maptype;
DBG_BUGON(*rq->in == NULL); DBG_BUGON(*rq->in == NULL);
headpage = kmap_atomic(*rq->in); headpage = kmap_atomic(*rq->in);
inputmargin = 0;
support_0padding = false;
/* decompression inplace is only safe when 0padding is enabled */ /* LZ4 decompression inplace is only safe if zero_padding is enabled */
if (erofs_sb_has_lz4_0padding(EROFS_SB(rq->sb))) { if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) {
support_0padding = true; support_0padding = true;
ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
while (!headpage[inputmargin & ~PAGE_MASK]) min_t(unsigned int, rq->inputsize,
if (!(++inputmargin & ~PAGE_MASK)) EROFS_BLKSIZ - rq->pageofs_in));
break; if (ret) {
if (inputmargin >= rq->inputsize) {
kunmap_atomic(headpage); kunmap_atomic(headpage);
return -EIO; return ret;
} }
may_inplace = !((rq->pageofs_in + rq->inputsize) &
(EROFS_BLKSIZ - 1));
} }
rq->inputsize -= inputmargin; inputmargin = rq->pageofs_in;
src = z_erofs_lz4_handle_inplace_io(rq, headpage, &inputmargin, src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin,
&maptype, support_0padding); &maptype, may_inplace);
if (IS_ERR(src)) if (IS_ERR(src))
return PTR_ERR(src); return PTR_ERR(src);
...@@ -240,9 +259,9 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, ...@@ -240,9 +259,9 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
} }
if (maptype == 0) { if (maptype == 0) {
kunmap_atomic(src); kunmap_atomic(headpage);
} else if (maptype == 1) { } else if (maptype == 1) {
vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT); vm_unmap_ram(src, ctx->inpages);
} else if (maptype == 2) { } else if (maptype == 2) {
erofs_put_pcpubuf(src); erofs_put_pcpubuf(src);
} else { } else {
...@@ -255,14 +274,18 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, ...@@ -255,14 +274,18 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool) struct page **pagepool)
{ {
const unsigned int nrpages_out = struct z_erofs_lz4_decompress_ctx ctx;
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
unsigned int dst_maptype; unsigned int dst_maptype;
void *dst; void *dst;
int ret; int ret;
ctx.rq = rq;
ctx.oend = rq->pageofs_out + rq->outputsize;
ctx.outpages = PAGE_ALIGN(ctx.oend) >> PAGE_SHIFT;
ctx.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
/* one optimized fast path only for non bigpcluster cases yet */ /* one optimized fast path only for non bigpcluster cases yet */
if (rq->inputsize <= PAGE_SIZE && nrpages_out == 1 && !rq->inplace_io) { if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) {
DBG_BUGON(!*rq->out); DBG_BUGON(!*rq->out);
dst = kmap_atomic(*rq->out); dst = kmap_atomic(*rq->out);
dst_maptype = 0; dst_maptype = 0;
...@@ -270,27 +293,25 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, ...@@ -270,27 +293,25 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
} }
/* general decoding path which can be used for all cases */ /* general decoding path which can be used for all cases */
ret = z_erofs_lz4_prepare_dstpages(rq, pagepool); ret = z_erofs_lz4_prepare_dstpages(&ctx, pagepool);
if (ret < 0) if (ret < 0) {
return ret; return ret;
if (ret) { } else if (ret > 0) {
dst = page_address(*rq->out); dst = page_address(*rq->out);
dst_maptype = 1; dst_maptype = 1;
goto dstmap_out; } else {
dst = erofs_vm_map_ram(rq->out, ctx.outpages);
if (!dst)
return -ENOMEM;
dst_maptype = 2;
} }
dst = erofs_vm_map_ram(rq->out, nrpages_out);
if (!dst)
return -ENOMEM;
dst_maptype = 2;
dstmap_out: dstmap_out:
ret = z_erofs_lz4_decompress_mem(rq, dst + rq->pageofs_out); ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out);
if (!dst_maptype) if (!dst_maptype)
kunmap_atomic(dst); kunmap_atomic(dst);
else if (dst_maptype == 2) else if (dst_maptype == 2)
vm_unmap_ram(dst, nrpages_out); vm_unmap_ram(dst, ctx.outpages);
return ret; return ret;
} }
...@@ -299,7 +320,8 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, ...@@ -299,7 +320,8 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq,
{ {
const unsigned int nrpages_out = const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out; const unsigned int righthalf = min_t(unsigned int, rq->outputsize,
PAGE_SIZE - rq->pageofs_out);
unsigned char *src, *dst; unsigned char *src, *dst;
if (nrpages_out > 2) { if (nrpages_out > 2) {
...@@ -312,7 +334,7 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, ...@@ -312,7 +334,7 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq,
return 0; return 0;
} }
src = kmap_atomic(*rq->in); src = kmap_atomic(*rq->in) + rq->pageofs_in;
if (rq->out[0]) { if (rq->out[0]) {
dst = kmap_atomic(rq->out[0]); dst = kmap_atomic(rq->out[0]);
memcpy(dst + rq->pageofs_out, src, righthalf); memcpy(dst + rq->pageofs_out, src, righthalf);
......
...@@ -156,7 +156,7 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, ...@@ -156,7 +156,7 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int nrpages_in = const unsigned int nrpages_in =
PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
unsigned int inputmargin, inlen, outlen, pageofs; unsigned int inlen, outlen, pageofs;
struct z_erofs_lzma *strm; struct z_erofs_lzma *strm;
u8 *kin; u8 *kin;
bool bounced = false; bool bounced = false;
...@@ -164,16 +164,13 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, ...@@ -164,16 +164,13 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
/* 1. get the exact LZMA compressed size */ /* 1. get the exact LZMA compressed size */
kin = kmap(*rq->in); kin = kmap(*rq->in);
inputmargin = 0; err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
while (!kin[inputmargin & ~PAGE_MASK]) min_t(unsigned int, rq->inputsize,
if (!(++inputmargin & ~PAGE_MASK)) EROFS_BLKSIZ - rq->pageofs_in));
break; if (err) {
if (inputmargin >= PAGE_SIZE) {
kunmap(*rq->in); kunmap(*rq->in);
return -EFSCORRUPTED; return err;
} }
rq->inputsize -= inputmargin;
/* 2. get an available lzma context */ /* 2. get an available lzma context */
again: again:
...@@ -193,9 +190,9 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, ...@@ -193,9 +190,9 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
xz_dec_microlzma_reset(strm->state, inlen, outlen, xz_dec_microlzma_reset(strm->state, inlen, outlen,
!rq->partial_decoding); !rq->partial_decoding);
pageofs = rq->pageofs_out; pageofs = rq->pageofs_out;
strm->buf.in = kin + inputmargin; strm->buf.in = kin + rq->pageofs_in;
strm->buf.in_pos = 0; strm->buf.in_pos = 0;
strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin); strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - rq->pageofs_in);
inlen -= strm->buf.in_size; inlen -= strm->buf.in_size;
strm->buf.out = NULL; strm->buf.out = NULL;
strm->buf.out_pos = 0; strm->buf.out_pos = 0;
......
...@@ -17,19 +17,21 @@ ...@@ -17,19 +17,21 @@
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
* be incompatible with this kernel version. * be incompatible with this kernel version.
*/ */
#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001 #define EROFS_FEATURE_INCOMPAT_ZERO_PADDING 0x00000001
#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002 #define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002 #define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004 #define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008 #define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010
#define EROFS_ALL_FEATURE_INCOMPAT \ #define EROFS_ALL_FEATURE_INCOMPAT \
(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \ (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \ EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
EROFS_FEATURE_INCOMPAT_COMPR_HEAD2) EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
#define EROFS_SB_EXTSLOT_SIZE 16 #define EROFS_SB_EXTSLOT_SIZE 16
...@@ -209,7 +211,7 @@ struct erofs_xattr_ibody_header { ...@@ -209,7 +211,7 @@ struct erofs_xattr_ibody_header {
__le32 h_reserved; __le32 h_reserved;
__u8 h_shared_count; __u8 h_shared_count;
__u8 h_reserved2[7]; __u8 h_reserved2[7];
__le32 h_shared_xattrs[0]; /* shared xattr id array */ __le32 h_shared_xattrs[]; /* shared xattr id array */
}; };
/* Name indexes */ /* Name indexes */
...@@ -226,7 +228,7 @@ struct erofs_xattr_entry { ...@@ -226,7 +228,7 @@ struct erofs_xattr_entry {
__u8 e_name_index; /* attribute name index */ __u8 e_name_index; /* attribute name index */
__le16 e_value_size; /* size of attribute value */ __le16 e_value_size; /* size of attribute value */
/* followed by e_name and e_value */ /* followed by e_name and e_value */
char e_name[0]; /* attribute name */ char e_name[]; /* attribute name */
}; };
static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount) static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount)
...@@ -292,13 +294,17 @@ struct z_erofs_lzma_cfgs { ...@@ -292,13 +294,17 @@ struct z_erofs_lzma_cfgs {
* (4B) + 2B + (4B) if compacted 2B is on. * (4B) + 2B + (4B) if compacted 2B is on.
* bit 1 : HEAD1 big pcluster (0 - off; 1 - on) * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
* bit 2 : HEAD2 big pcluster (0 - off; 1 - on) * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
* bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
*/ */
#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008
struct z_erofs_map_header { struct z_erofs_map_header {
__le32 h_reserved1; __le16 h_reserved1;
/* indicates the encoded size of tailpacking data */
__le16 h_idata_size;
__le16 h_advise; __le16 h_advise;
/* /*
* bit 0-3 : algorithm type of head 1 (logical cluster type 01); * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
* the inode payload page if it's an extended inode) in order to fill * the inode payload page if it's an extended inode) in order to fill
* inline data if possible. * inline data if possible.
*/ */
static struct page *erofs_read_inode(struct inode *inode, static void *erofs_read_inode(struct erofs_buf *buf,
unsigned int *ofs) struct inode *inode, unsigned int *ofs)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_sb_info *sbi = EROFS_SB(sb);
...@@ -22,7 +22,7 @@ static struct page *erofs_read_inode(struct inode *inode, ...@@ -22,7 +22,7 @@ static struct page *erofs_read_inode(struct inode *inode,
const erofs_off_t inode_loc = iloc(sbi, vi->nid); const erofs_off_t inode_loc = iloc(sbi, vi->nid);
erofs_blk_t blkaddr, nblks = 0; erofs_blk_t blkaddr, nblks = 0;
struct page *page; void *kaddr;
struct erofs_inode_compact *dic; struct erofs_inode_compact *dic;
struct erofs_inode_extended *die, *copied = NULL; struct erofs_inode_extended *die, *copied = NULL;
unsigned int ifmt; unsigned int ifmt;
...@@ -34,14 +34,14 @@ static struct page *erofs_read_inode(struct inode *inode, ...@@ -34,14 +34,14 @@ static struct page *erofs_read_inode(struct inode *inode,
erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u", erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u",
__func__, vi->nid, *ofs, blkaddr); __func__, vi->nid, *ofs, blkaddr);
page = erofs_get_meta_page(sb, blkaddr); kaddr = erofs_read_metabuf(buf, sb, blkaddr, EROFS_KMAP);
if (IS_ERR(page)) { if (IS_ERR(kaddr)) {
erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld",
vi->nid, PTR_ERR(page)); vi->nid, PTR_ERR(kaddr));
return page; return kaddr;
} }
dic = page_address(page) + *ofs; dic = kaddr + *ofs;
ifmt = le16_to_cpu(dic->i_format); ifmt = le16_to_cpu(dic->i_format);
if (ifmt & ~EROFS_I_ALL) { if (ifmt & ~EROFS_I_ALL) {
...@@ -62,12 +62,12 @@ static struct page *erofs_read_inode(struct inode *inode, ...@@ -62,12 +62,12 @@ static struct page *erofs_read_inode(struct inode *inode,
switch (erofs_inode_version(ifmt)) { switch (erofs_inode_version(ifmt)) {
case EROFS_INODE_LAYOUT_EXTENDED: case EROFS_INODE_LAYOUT_EXTENDED:
vi->inode_isize = sizeof(struct erofs_inode_extended); vi->inode_isize = sizeof(struct erofs_inode_extended);
/* check if the inode acrosses page boundary */ /* check if the extended inode acrosses block boundary */
if (*ofs + vi->inode_isize <= PAGE_SIZE) { if (*ofs + vi->inode_isize <= EROFS_BLKSIZ) {
*ofs += vi->inode_isize; *ofs += vi->inode_isize;
die = (struct erofs_inode_extended *)dic; die = (struct erofs_inode_extended *)dic;
} else { } else {
const unsigned int gotten = PAGE_SIZE - *ofs; const unsigned int gotten = EROFS_BLKSIZ - *ofs;
copied = kmalloc(vi->inode_isize, GFP_NOFS); copied = kmalloc(vi->inode_isize, GFP_NOFS);
if (!copied) { if (!copied) {
...@@ -75,18 +75,16 @@ static struct page *erofs_read_inode(struct inode *inode, ...@@ -75,18 +75,16 @@ static struct page *erofs_read_inode(struct inode *inode,
goto err_out; goto err_out;
} }
memcpy(copied, dic, gotten); memcpy(copied, dic, gotten);
unlock_page(page); kaddr = erofs_read_metabuf(buf, sb, blkaddr + 1,
put_page(page); EROFS_KMAP);
if (IS_ERR(kaddr)) {
page = erofs_get_meta_page(sb, blkaddr + 1); erofs_err(sb, "failed to get inode payload block (nid: %llu), err %ld",
if (IS_ERR(page)) { vi->nid, PTR_ERR(kaddr));
erofs_err(sb, "failed to get inode payload page (nid: %llu), err %ld",
vi->nid, PTR_ERR(page));
kfree(copied); kfree(copied);
return page; return kaddr;
} }
*ofs = vi->inode_isize - gotten; *ofs = vi->inode_isize - gotten;
memcpy((u8 *)copied + gotten, page_address(page), *ofs); memcpy((u8 *)copied + gotten, kaddr, *ofs);
die = copied; die = copied;
} }
vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
...@@ -200,7 +198,7 @@ static struct page *erofs_read_inode(struct inode *inode, ...@@ -200,7 +198,7 @@ static struct page *erofs_read_inode(struct inode *inode,
inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9; inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
else else
inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK; inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK;
return page; return kaddr;
bogusimode: bogusimode:
erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu", erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu",
...@@ -209,12 +207,11 @@ static struct page *erofs_read_inode(struct inode *inode, ...@@ -209,12 +207,11 @@ static struct page *erofs_read_inode(struct inode *inode,
err_out: err_out:
DBG_BUGON(1); DBG_BUGON(1);
kfree(copied); kfree(copied);
unlock_page(page); erofs_put_metabuf(buf);
put_page(page);
return ERR_PTR(err); return ERR_PTR(err);
} }
static int erofs_fill_symlink(struct inode *inode, void *data, static int erofs_fill_symlink(struct inode *inode, void *kaddr,
unsigned int m_pofs) unsigned int m_pofs)
{ {
struct erofs_inode *vi = EROFS_I(inode); struct erofs_inode *vi = EROFS_I(inode);
...@@ -222,7 +219,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data, ...@@ -222,7 +219,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data,
/* if it cannot be handled with fast symlink scheme */ /* if it cannot be handled with fast symlink scheme */
if (vi->datalayout != EROFS_INODE_FLAT_INLINE || if (vi->datalayout != EROFS_INODE_FLAT_INLINE ||
inode->i_size >= PAGE_SIZE) { inode->i_size >= EROFS_BLKSIZ) {
inode->i_op = &erofs_symlink_iops; inode->i_op = &erofs_symlink_iops;
return 0; return 0;
} }
...@@ -232,8 +229,8 @@ static int erofs_fill_symlink(struct inode *inode, void *data, ...@@ -232,8 +229,8 @@ static int erofs_fill_symlink(struct inode *inode, void *data,
return -ENOMEM; return -ENOMEM;
m_pofs += vi->xattr_isize; m_pofs += vi->xattr_isize;
/* inline symlink data shouldn't cross page boundary as well */ /* inline symlink data shouldn't cross block boundary */
if (m_pofs + inode->i_size > PAGE_SIZE) { if (m_pofs + inode->i_size > EROFS_BLKSIZ) {
kfree(lnk); kfree(lnk);
erofs_err(inode->i_sb, erofs_err(inode->i_sb,
"inline data cross block boundary @ nid %llu", "inline data cross block boundary @ nid %llu",
...@@ -241,8 +238,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data, ...@@ -241,8 +238,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data,
DBG_BUGON(1); DBG_BUGON(1);
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
memcpy(lnk, kaddr + m_pofs, inode->i_size);
memcpy(lnk, data + m_pofs, inode->i_size);
lnk[inode->i_size] = '\0'; lnk[inode->i_size] = '\0';
inode->i_link = lnk; inode->i_link = lnk;
...@@ -253,16 +249,17 @@ static int erofs_fill_symlink(struct inode *inode, void *data, ...@@ -253,16 +249,17 @@ static int erofs_fill_symlink(struct inode *inode, void *data,
static int erofs_fill_inode(struct inode *inode, int isdir) static int erofs_fill_inode(struct inode *inode, int isdir)
{ {
struct erofs_inode *vi = EROFS_I(inode); struct erofs_inode *vi = EROFS_I(inode);
struct page *page; struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
void *kaddr;
unsigned int ofs; unsigned int ofs;
int err = 0; int err = 0;
trace_erofs_fill_inode(inode, isdir); trace_erofs_fill_inode(inode, isdir);
/* read inode base data from disk */ /* read inode base data from disk */
page = erofs_read_inode(inode, &ofs); kaddr = erofs_read_inode(&buf, inode, &ofs);
if (IS_ERR(page)) if (IS_ERR(kaddr))
return PTR_ERR(page); return PTR_ERR(kaddr);
/* setup the new inode */ /* setup the new inode */
switch (inode->i_mode & S_IFMT) { switch (inode->i_mode & S_IFMT) {
...@@ -278,7 +275,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir) ...@@ -278,7 +275,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir)
inode->i_fop = &erofs_dir_fops; inode->i_fop = &erofs_dir_fops;
break; break;
case S_IFLNK: case S_IFLNK:
err = erofs_fill_symlink(inode, page_address(page), ofs); err = erofs_fill_symlink(inode, kaddr, ofs);
if (err) if (err)
goto out_unlock; goto out_unlock;
inode_nohighmem(inode); inode_nohighmem(inode);
...@@ -302,8 +299,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir) ...@@ -302,8 +299,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir)
inode->i_mapping->a_ops = &erofs_raw_access_aops; inode->i_mapping->a_ops = &erofs_raw_access_aops;
out_unlock: out_unlock:
unlock_page(page); erofs_put_metabuf(&buf);
put_page(page);
return err; return err;
} }
......
...@@ -56,12 +56,18 @@ struct erofs_device_info { ...@@ -56,12 +56,18 @@ struct erofs_device_info {
u32 mapped_blkaddr; u32 mapped_blkaddr;
}; };
enum {
EROFS_SYNC_DECOMPRESS_AUTO,
EROFS_SYNC_DECOMPRESS_FORCE_ON,
EROFS_SYNC_DECOMPRESS_FORCE_OFF
};
struct erofs_mount_opts { struct erofs_mount_opts {
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
/* current strategy of how to use managed cache */ /* current strategy of how to use managed cache */
unsigned char cache_strategy; unsigned char cache_strategy;
/* strategy of sync decompression (false - auto, true - force on) */ /* strategy of sync decompression (0 - auto, 1 - force on, 2 - force off) */
bool readahead_sync_decompress; unsigned int sync_decompress;
/* threshold for decompression synchronously */ /* threshold for decompression synchronously */
unsigned int max_sync_decompress_pages; unsigned int max_sync_decompress_pages;
...@@ -134,6 +140,10 @@ struct erofs_sb_info { ...@@ -134,6 +140,10 @@ struct erofs_sb_info {
u8 volume_name[16]; /* volume name */ u8 volume_name[16]; /* volume name */
u32 feature_compat; u32 feature_compat;
u32 feature_incompat; u32 feature_incompat;
/* sysfs support */
struct kobject s_kobj; /* /sys/fs/erofs/<devname> */
struct completion s_kobj_unregister;
}; };
#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
...@@ -241,6 +251,19 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) ...@@ -241,6 +251,19 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
#error erofs cannot be used in this platform #error erofs cannot be used in this platform
#endif #endif
enum erofs_kmap_type {
EROFS_NO_KMAP, /* don't map the buffer */
EROFS_KMAP, /* use kmap() to map the buffer */
EROFS_KMAP_ATOMIC, /* use kmap_atomic() to map the buffer */
};
struct erofs_buf {
struct page *page;
void *base;
enum erofs_kmap_type kmap_type;
};
#define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL })
#define ROOT_NID(sb) ((sb)->root_nid) #define ROOT_NID(sb) ((sb)->root_nid)
#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ) #define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ)
...@@ -258,10 +281,13 @@ static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \ ...@@ -258,10 +281,13 @@ static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \
return sbi->feature_##compat & EROFS_FEATURE_##feature; \ return sbi->feature_##compat & EROFS_FEATURE_##feature; \
} }
EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING) EROFS_FEATURE_FUNCS(zero_padding, incompat, INCOMPAT_ZERO_PADDING)
EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS) EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER) EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2)
EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
/* atomic flag definitions */ /* atomic flag definitions */
...@@ -296,6 +322,9 @@ struct erofs_inode { ...@@ -296,6 +322,9 @@ struct erofs_inode {
unsigned short z_advise; unsigned short z_advise;
unsigned char z_algorithmtype[2]; unsigned char z_algorithmtype[2];
unsigned char z_logical_clusterbits; unsigned char z_logical_clusterbits;
unsigned long z_tailextent_headlcn;
unsigned int z_idataoff;
unsigned short z_idata_size;
}; };
#endif /* CONFIG_EROFS_FS_ZIP */ #endif /* CONFIG_EROFS_FS_ZIP */
}; };
...@@ -390,14 +419,14 @@ enum { ...@@ -390,14 +419,14 @@ enum {
#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) #define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
struct erofs_map_blocks { struct erofs_map_blocks {
struct erofs_buf buf;
erofs_off_t m_pa, m_la; erofs_off_t m_pa, m_la;
u64 m_plen, m_llen; u64 m_plen, m_llen;
unsigned short m_deviceid; unsigned short m_deviceid;
char m_algorithmformat; char m_algorithmformat;
unsigned int m_flags; unsigned int m_flags;
struct page *mpage;
}; };
/* Flags used by erofs_map_blocks_flatmode() */ /* Flags used by erofs_map_blocks_flatmode() */
...@@ -409,6 +438,8 @@ struct erofs_map_blocks { ...@@ -409,6 +438,8 @@ struct erofs_map_blocks {
#define EROFS_GET_BLOCKS_FIEMAP 0x0002 #define EROFS_GET_BLOCKS_FIEMAP 0x0002
/* Used to map the whole extent if non-negligible data is requested for LZMA */ /* Used to map the whole extent if non-negligible data is requested for LZMA */
#define EROFS_GET_BLOCKS_READMORE 0x0004 #define EROFS_GET_BLOCKS_READMORE 0x0004
/* Used to map tail extent for tailpacking inline pcluster */
#define EROFS_GET_BLOCKS_FINDTAIL 0x0008
enum { enum {
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
...@@ -443,7 +474,10 @@ struct erofs_map_dev { ...@@ -443,7 +474,10 @@ struct erofs_map_dev {
/* data.c */ /* data.c */
extern const struct file_operations erofs_file_fops; extern const struct file_operations erofs_file_fops;
struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr); void erofs_unmap_metabuf(struct erofs_buf *buf);
void erofs_put_metabuf(struct erofs_buf *buf);
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
erofs_blk_t blkaddr, enum erofs_kmap_type type);
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len); u64 start, u64 len);
...@@ -498,6 +532,12 @@ int erofs_pcpubuf_growsize(unsigned int nrpages); ...@@ -498,6 +532,12 @@ int erofs_pcpubuf_growsize(unsigned int nrpages);
void erofs_pcpubuf_init(void); void erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void); void erofs_pcpubuf_exit(void);
/* sysfs.c */
int erofs_register_sysfs(struct super_block *sb);
void erofs_unregister_sysfs(struct super_block *sb);
int __init erofs_init_sysfs(void);
void erofs_exit_sysfs(void);
/* utils.c / zdata.c */ /* utils.c / zdata.c */
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp); struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp);
static inline void erofs_pagepool_add(struct page **pagepool, static inline void erofs_pagepool_add(struct page **pagepool,
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
/* /*
* Copyright (C) 2017-2018 HUAWEI, Inc. * Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/ * https://www.huawei.com/
* Copyright (C) 2021, Alibaba Cloud
*/ */
#include <linux/module.h> #include <linux/module.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
...@@ -124,80 +125,50 @@ static bool check_layout_compatibility(struct super_block *sb, ...@@ -124,80 +125,50 @@ static bool check_layout_compatibility(struct super_block *sb,
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
/* read variable-sized metadata, offset will be aligned by 4-byte */ /* read variable-sized metadata, offset will be aligned by 4-byte */
static void *erofs_read_metadata(struct super_block *sb, struct page **pagep, static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
erofs_off_t *offset, int *lengthp) erofs_off_t *offset, int *lengthp)
{ {
struct page *page = *pagep;
u8 *buffer, *ptr; u8 *buffer, *ptr;
int len, i, cnt; int len, i, cnt;
erofs_blk_t blk;
*offset = round_up(*offset, 4); *offset = round_up(*offset, 4);
blk = erofs_blknr(*offset); ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), EROFS_KMAP);
if (IS_ERR(ptr))
return ptr;
if (!page || page->index != blk) {
if (page) {
unlock_page(page);
put_page(page);
}
page = erofs_get_meta_page(sb, blk);
if (IS_ERR(page))
goto err_nullpage;
}
ptr = kmap(page);
len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]); len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]);
if (!len) if (!len)
len = U16_MAX + 1; len = U16_MAX + 1;
buffer = kmalloc(len, GFP_KERNEL); buffer = kmalloc(len, GFP_KERNEL);
if (!buffer) { if (!buffer)
buffer = ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
goto out;
}
*offset += sizeof(__le16); *offset += sizeof(__le16);
*lengthp = len; *lengthp = len;
for (i = 0; i < len; i += cnt) { for (i = 0; i < len; i += cnt) {
cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i); cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i);
blk = erofs_blknr(*offset); ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset),
EROFS_KMAP);
if (!page || page->index != blk) { if (IS_ERR(ptr)) {
if (page) { kfree(buffer);
kunmap(page); return ptr;
unlock_page(page);
put_page(page);
}
page = erofs_get_meta_page(sb, blk);
if (IS_ERR(page)) {
kfree(buffer);
goto err_nullpage;
}
ptr = kmap(page);
} }
memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt); memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt);
*offset += cnt; *offset += cnt;
} }
out:
kunmap(page);
*pagep = page;
return buffer; return buffer;
err_nullpage:
*pagep = NULL;
return page;
} }
static int erofs_load_compr_cfgs(struct super_block *sb, static int erofs_load_compr_cfgs(struct super_block *sb,
struct erofs_super_block *dsb) struct erofs_super_block *dsb)
{ {
struct erofs_sb_info *sbi; struct erofs_sb_info *sbi = EROFS_SB(sb);
struct page *page; struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
unsigned int algs, alg; unsigned int algs, alg;
erofs_off_t offset; erofs_off_t offset;
int size, ret; int size, ret = 0;
sbi = EROFS_SB(sb);
sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs);
if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) {
erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", erofs_err(sb, "try to load compressed fs with unsupported algorithms %x",
sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS);
...@@ -205,20 +176,17 @@ static int erofs_load_compr_cfgs(struct super_block *sb, ...@@ -205,20 +176,17 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
} }
offset = EROFS_SUPER_OFFSET + sbi->sb_size; offset = EROFS_SUPER_OFFSET + sbi->sb_size;
page = NULL;
alg = 0; alg = 0;
ret = 0;
for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) {
void *data; void *data;
if (!(algs & 1)) if (!(algs & 1))
continue; continue;
data = erofs_read_metadata(sb, &page, &offset, &size); data = erofs_read_metadata(sb, &buf, &offset, &size);
if (IS_ERR(data)) { if (IS_ERR(data)) {
ret = PTR_ERR(data); ret = PTR_ERR(data);
goto err; break;
} }
switch (alg) { switch (alg) {
...@@ -234,13 +202,9 @@ static int erofs_load_compr_cfgs(struct super_block *sb, ...@@ -234,13 +202,9 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
} }
kfree(data); kfree(data);
if (ret) if (ret)
goto err; break;
}
err:
if (page) {
unlock_page(page);
put_page(page);
} }
erofs_put_metabuf(&buf);
return ret; return ret;
} }
#else #else
...@@ -261,7 +225,7 @@ static int erofs_init_devices(struct super_block *sb, ...@@ -261,7 +225,7 @@ static int erofs_init_devices(struct super_block *sb,
struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_sb_info *sbi = EROFS_SB(sb);
unsigned int ondisk_extradevs; unsigned int ondisk_extradevs;
erofs_off_t pos; erofs_off_t pos;
struct page *page = NULL; struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_device_info *dif; struct erofs_device_info *dif;
struct erofs_deviceslot *dis; struct erofs_deviceslot *dis;
void *ptr; void *ptr;
...@@ -285,22 +249,13 @@ static int erofs_init_devices(struct super_block *sb, ...@@ -285,22 +249,13 @@ static int erofs_init_devices(struct super_block *sb,
pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
down_read(&sbi->devs->rwsem); down_read(&sbi->devs->rwsem);
idr_for_each_entry(&sbi->devs->tree, dif, id) { idr_for_each_entry(&sbi->devs->tree, dif, id) {
erofs_blk_t blk = erofs_blknr(pos);
struct block_device *bdev; struct block_device *bdev;
if (!page || page->index != blk) { ptr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos),
if (page) { EROFS_KMAP);
kunmap(page); if (IS_ERR(ptr)) {
unlock_page(page); err = PTR_ERR(ptr);
put_page(page); break;
}
page = erofs_get_meta_page(sb, blk);
if (IS_ERR(page)) {
up_read(&sbi->devs->rwsem);
return PTR_ERR(page);
}
ptr = kmap(page);
} }
dis = ptr + erofs_blkoff(pos); dis = ptr + erofs_blkoff(pos);
...@@ -309,7 +264,7 @@ static int erofs_init_devices(struct super_block *sb, ...@@ -309,7 +264,7 @@ static int erofs_init_devices(struct super_block *sb,
sb->s_type); sb->s_type);
if (IS_ERR(bdev)) { if (IS_ERR(bdev)) {
err = PTR_ERR(bdev); err = PTR_ERR(bdev);
goto err_out; break;
} }
dif->bdev = bdev; dif->bdev = bdev;
dif->dax_dev = fs_dax_get_by_bdev(bdev); dif->dax_dev = fs_dax_get_by_bdev(bdev);
...@@ -318,13 +273,8 @@ static int erofs_init_devices(struct super_block *sb, ...@@ -318,13 +273,8 @@ static int erofs_init_devices(struct super_block *sb,
sbi->total_blocks += dif->blocks; sbi->total_blocks += dif->blocks;
pos += EROFS_DEVT_SLOT_SIZE; pos += EROFS_DEVT_SLOT_SIZE;
} }
err_out:
up_read(&sbi->devs->rwsem); up_read(&sbi->devs->rwsem);
if (page) { erofs_put_metabuf(&buf);
kunmap(page);
unlock_page(page);
put_page(page);
}
return err; return err;
} }
...@@ -411,6 +361,9 @@ static int erofs_read_superblock(struct super_block *sb) ...@@ -411,6 +361,9 @@ static int erofs_read_superblock(struct super_block *sb)
/* handle multiple devices */ /* handle multiple devices */
ret = erofs_init_devices(sb, dsb); ret = erofs_init_devices(sb, dsb);
if (erofs_sb_has_ztailpacking(sbi))
erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
out: out:
kunmap(page); kunmap(page);
put_page(page); put_page(page);
...@@ -423,7 +376,7 @@ static void erofs_default_options(struct erofs_fs_context *ctx) ...@@ -423,7 +376,7 @@ static void erofs_default_options(struct erofs_fs_context *ctx)
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
ctx->opt.max_sync_decompress_pages = 3; ctx->opt.max_sync_decompress_pages = 3;
ctx->opt.readahead_sync_decompress = false; ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
#endif #endif
#ifdef CONFIG_EROFS_FS_XATTR #ifdef CONFIG_EROFS_FS_XATTR
set_opt(&ctx->opt, XATTR_USER); set_opt(&ctx->opt, XATTR_USER);
...@@ -695,6 +648,10 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) ...@@ -695,6 +648,10 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err) if (err)
return err; return err;
err = erofs_register_sysfs(sb);
if (err)
return err;
erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi)); erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi));
return 0; return 0;
} }
...@@ -808,6 +765,7 @@ static void erofs_put_super(struct super_block *sb) ...@@ -808,6 +765,7 @@ static void erofs_put_super(struct super_block *sb)
DBG_BUGON(!sbi); DBG_BUGON(!sbi);
erofs_unregister_sysfs(sb);
erofs_shrinker_unregister(sb); erofs_shrinker_unregister(sb);
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
iput(sbi->managed_cache); iput(sbi->managed_cache);
...@@ -852,6 +810,10 @@ static int __init erofs_module_init(void) ...@@ -852,6 +810,10 @@ static int __init erofs_module_init(void)
if (err) if (err)
goto zip_err; goto zip_err;
err = erofs_init_sysfs();
if (err)
goto sysfs_err;
err = register_filesystem(&erofs_fs_type); err = register_filesystem(&erofs_fs_type);
if (err) if (err)
goto fs_err; goto fs_err;
...@@ -859,6 +821,8 @@ static int __init erofs_module_init(void) ...@@ -859,6 +821,8 @@ static int __init erofs_module_init(void)
return 0; return 0;
fs_err: fs_err:
erofs_exit_sysfs();
sysfs_err:
z_erofs_exit_zip_subsystem(); z_erofs_exit_zip_subsystem();
zip_err: zip_err:
z_erofs_lzma_exit(); z_erofs_lzma_exit();
...@@ -877,6 +841,7 @@ static void __exit erofs_module_exit(void) ...@@ -877,6 +841,7 @@ static void __exit erofs_module_exit(void)
/* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */
rcu_barrier(); rcu_barrier();
erofs_exit_sysfs();
z_erofs_exit_zip_subsystem(); z_erofs_exit_zip_subsystem();
z_erofs_lzma_exit(); z_erofs_lzma_exit();
erofs_exit_shrinker(); erofs_exit_shrinker();
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
* https://www.oppo.com/
*/
#include <linux/sysfs.h>
#include <linux/kobject.h>
#include "internal.h"
enum {
attr_feature,
attr_pointer_ui,
attr_pointer_bool,
};
enum {
struct_erofs_sb_info,
struct_erofs_mount_opts,
};
struct erofs_attr {
struct attribute attr;
short attr_id;
int struct_type, offset;
};
#define EROFS_ATTR(_name, _mode, _id) \
static struct erofs_attr erofs_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.attr_id = attr_##_id, \
}
#define EROFS_ATTR_FUNC(_name, _mode) EROFS_ATTR(_name, _mode, _name)
#define EROFS_ATTR_FEATURE(_name) EROFS_ATTR(_name, 0444, feature)
#define EROFS_ATTR_OFFSET(_name, _mode, _id, _struct) \
static struct erofs_attr erofs_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.attr_id = attr_##_id, \
.struct_type = struct_##_struct, \
.offset = offsetof(struct _struct, _name),\
}
#define EROFS_ATTR_RW(_name, _id, _struct) \
EROFS_ATTR_OFFSET(_name, 0644, _id, _struct)
#define EROFS_RO_ATTR(_name, _id, _struct) \
EROFS_ATTR_OFFSET(_name, 0444, _id, _struct)
#define EROFS_ATTR_RW_UI(_name, _struct) \
EROFS_ATTR_RW(_name, pointer_ui, _struct)
#define EROFS_ATTR_RW_BOOL(_name, _struct) \
EROFS_ATTR_RW(_name, pointer_bool, _struct)
#define ATTR_LIST(name) (&erofs_attr_##name.attr)
#ifdef CONFIG_EROFS_FS_ZIP
EROFS_ATTR_RW_UI(sync_decompress, erofs_mount_opts);
#endif
static struct attribute *erofs_attrs[] = {
#ifdef CONFIG_EROFS_FS_ZIP
ATTR_LIST(sync_decompress),
#endif
NULL,
};
ATTRIBUTE_GROUPS(erofs);
/* Features this copy of erofs supports */
EROFS_ATTR_FEATURE(zero_padding);
EROFS_ATTR_FEATURE(compr_cfgs);
EROFS_ATTR_FEATURE(big_pcluster);
EROFS_ATTR_FEATURE(chunked_file);
EROFS_ATTR_FEATURE(device_table);
EROFS_ATTR_FEATURE(compr_head2);
EROFS_ATTR_FEATURE(sb_chksum);
EROFS_ATTR_FEATURE(ztailpacking);
static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(zero_padding),
ATTR_LIST(compr_cfgs),
ATTR_LIST(big_pcluster),
ATTR_LIST(chunked_file),
ATTR_LIST(device_table),
ATTR_LIST(compr_head2),
ATTR_LIST(sb_chksum),
ATTR_LIST(ztailpacking),
NULL,
};
ATTRIBUTE_GROUPS(erofs_feat);
static unsigned char *__struct_ptr(struct erofs_sb_info *sbi,
int struct_type, int offset)
{
if (struct_type == struct_erofs_sb_info)
return (unsigned char *)sbi + offset;
if (struct_type == struct_erofs_mount_opts)
return (unsigned char *)&sbi->opt + offset;
return NULL;
}
static ssize_t erofs_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info,
s_kobj);
struct erofs_attr *a = container_of(attr, struct erofs_attr, attr);
unsigned char *ptr = __struct_ptr(sbi, a->struct_type, a->offset);
switch (a->attr_id) {
case attr_feature:
return sysfs_emit(buf, "supported\n");
case attr_pointer_ui:
if (!ptr)
return 0;
return sysfs_emit(buf, "%u\n", *(unsigned int *)ptr);
case attr_pointer_bool:
if (!ptr)
return 0;
return sysfs_emit(buf, "%d\n", *(bool *)ptr);
}
return 0;
}
static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info,
s_kobj);
struct erofs_attr *a = container_of(attr, struct erofs_attr, attr);
unsigned char *ptr = __struct_ptr(sbi, a->struct_type, a->offset);
unsigned long t;
int ret;
switch (a->attr_id) {
case attr_pointer_ui:
if (!ptr)
return 0;
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (t != (unsigned int)t)
return -ERANGE;
#ifdef CONFIG_EROFS_FS_ZIP
if (!strcmp(a->attr.name, "sync_decompress") &&
(t > EROFS_SYNC_DECOMPRESS_FORCE_OFF))
return -EINVAL;
#endif
*(unsigned int *)ptr = t;
return len;
case attr_pointer_bool:
if (!ptr)
return 0;
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (t != 0 && t != 1)
return -EINVAL;
*(bool *)ptr = !!t;
return len;
}
return 0;
}
static void erofs_sb_release(struct kobject *kobj)
{
struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info,
s_kobj);
complete(&sbi->s_kobj_unregister);
}
static const struct sysfs_ops erofs_attr_ops = {
.show = erofs_attr_show,
.store = erofs_attr_store,
};
static struct kobj_type erofs_sb_ktype = {
.default_groups = erofs_groups,
.sysfs_ops = &erofs_attr_ops,
.release = erofs_sb_release,
};
static struct kobj_type erofs_ktype = {
.sysfs_ops = &erofs_attr_ops,
};
static struct kset erofs_root = {
.kobj = {.ktype = &erofs_ktype},
};
static struct kobj_type erofs_feat_ktype = {
.default_groups = erofs_feat_groups,
.sysfs_ops = &erofs_attr_ops,
};
static struct kobject erofs_feat = {
.kset = &erofs_root,
};
int erofs_register_sysfs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
int err;
sbi->s_kobj.kset = &erofs_root;
init_completion(&sbi->s_kobj_unregister);
err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL,
"%s", sb->s_id);
if (err)
goto put_sb_kobj;
return 0;
put_sb_kobj:
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
return err;
}
void erofs_unregister_sysfs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
kobject_del(&sbi->s_kobj);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
}
int __init erofs_init_sysfs(void)
{
int ret;
kobject_set_name(&erofs_root.kobj, "erofs");
erofs_root.kobj.parent = fs_kobj;
ret = kset_register(&erofs_root);
if (ret)
goto root_err;
ret = kobject_init_and_add(&erofs_feat, &erofs_feat_ktype,
NULL, "features");
if (ret)
goto feat_err;
return ret;
feat_err:
kobject_put(&erofs_feat);
kset_unregister(&erofs_root);
root_err:
return ret;
}
void erofs_exit_sysfs(void)
{
kobject_put(&erofs_feat);
kset_unregister(&erofs_root);
}
...@@ -2,39 +2,20 @@ ...@@ -2,39 +2,20 @@
/* /*
* Copyright (C) 2017-2018 HUAWEI, Inc. * Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/ * https://www.huawei.com/
* Copyright (C) 2021-2022, Alibaba Cloud
*/ */
#include <linux/security.h> #include <linux/security.h>
#include "xattr.h" #include "xattr.h"
struct xattr_iter { struct xattr_iter {
struct super_block *sb; struct super_block *sb;
struct page *page; struct erofs_buf buf;
void *kaddr; void *kaddr;
erofs_blk_t blkaddr; erofs_blk_t blkaddr;
unsigned int ofs; unsigned int ofs;
}; };
static inline void xattr_iter_end(struct xattr_iter *it, bool atomic)
{
/* the only user of kunmap() is 'init_inode_xattrs' */
if (!atomic)
kunmap(it->page);
else
kunmap_atomic(it->kaddr);
unlock_page(it->page);
put_page(it->page);
}
static inline void xattr_iter_end_final(struct xattr_iter *it)
{
if (!it->page)
return;
xattr_iter_end(it, true);
}
static int init_inode_xattrs(struct inode *inode) static int init_inode_xattrs(struct inode *inode)
{ {
struct erofs_inode *const vi = EROFS_I(inode); struct erofs_inode *const vi = EROFS_I(inode);
...@@ -43,7 +24,6 @@ static int init_inode_xattrs(struct inode *inode) ...@@ -43,7 +24,6 @@ static int init_inode_xattrs(struct inode *inode)
struct erofs_xattr_ibody_header *ih; struct erofs_xattr_ibody_header *ih;
struct super_block *sb; struct super_block *sb;
struct erofs_sb_info *sbi; struct erofs_sb_info *sbi;
bool atomic_map;
int ret = 0; int ret = 0;
/* the most case is that xattrs of this inode are initialized. */ /* the most case is that xattrs of this inode are initialized. */
...@@ -91,26 +71,23 @@ static int init_inode_xattrs(struct inode *inode) ...@@ -91,26 +71,23 @@ static int init_inode_xattrs(struct inode *inode)
sb = inode->i_sb; sb = inode->i_sb;
sbi = EROFS_SB(sb); sbi = EROFS_SB(sb);
it.buf = __EROFS_BUF_INITIALIZER;
it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize); it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize);
it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize); it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize);
it.page = erofs_get_meta_page(sb, it.blkaddr); /* read in shared xattr array (non-atomic, see kmalloc below) */
if (IS_ERR(it.page)) { it.kaddr = erofs_read_metabuf(&it.buf, sb, it.blkaddr, EROFS_KMAP);
ret = PTR_ERR(it.page); if (IS_ERR(it.kaddr)) {
ret = PTR_ERR(it.kaddr);
goto out_unlock; goto out_unlock;
} }
/* read in shared xattr array (non-atomic, see kmalloc below) */
it.kaddr = kmap(it.page);
atomic_map = false;
ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs); ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs);
vi->xattr_shared_count = ih->h_shared_count; vi->xattr_shared_count = ih->h_shared_count;
vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count, vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count,
sizeof(uint), GFP_KERNEL); sizeof(uint), GFP_KERNEL);
if (!vi->xattr_shared_xattrs) { if (!vi->xattr_shared_xattrs) {
xattr_iter_end(&it, atomic_map); erofs_put_metabuf(&it.buf);
ret = -ENOMEM; ret = -ENOMEM;
goto out_unlock; goto out_unlock;
} }
...@@ -122,25 +99,22 @@ static int init_inode_xattrs(struct inode *inode) ...@@ -122,25 +99,22 @@ static int init_inode_xattrs(struct inode *inode)
if (it.ofs >= EROFS_BLKSIZ) { if (it.ofs >= EROFS_BLKSIZ) {
/* cannot be unaligned */ /* cannot be unaligned */
DBG_BUGON(it.ofs != EROFS_BLKSIZ); DBG_BUGON(it.ofs != EROFS_BLKSIZ);
xattr_iter_end(&it, atomic_map);
it.page = erofs_get_meta_page(sb, ++it.blkaddr); it.kaddr = erofs_read_metabuf(&it.buf, sb, ++it.blkaddr,
if (IS_ERR(it.page)) { EROFS_KMAP);
if (IS_ERR(it.kaddr)) {
kfree(vi->xattr_shared_xattrs); kfree(vi->xattr_shared_xattrs);
vi->xattr_shared_xattrs = NULL; vi->xattr_shared_xattrs = NULL;
ret = PTR_ERR(it.page); ret = PTR_ERR(it.kaddr);
goto out_unlock; goto out_unlock;
} }
it.kaddr = kmap_atomic(it.page);
atomic_map = true;
it.ofs = 0; it.ofs = 0;
} }
vi->xattr_shared_xattrs[i] = vi->xattr_shared_xattrs[i] =
le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs)); le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs));
it.ofs += sizeof(__le32); it.ofs += sizeof(__le32);
} }
xattr_iter_end(&it, atomic_map); erofs_put_metabuf(&it.buf);
/* paired with smp_mb() at the beginning of the function. */ /* paired with smp_mb() at the beginning of the function. */
smp_mb(); smp_mb();
...@@ -172,19 +146,11 @@ static inline int xattr_iter_fixup(struct xattr_iter *it) ...@@ -172,19 +146,11 @@ static inline int xattr_iter_fixup(struct xattr_iter *it)
if (it->ofs < EROFS_BLKSIZ) if (it->ofs < EROFS_BLKSIZ)
return 0; return 0;
xattr_iter_end(it, true);
it->blkaddr += erofs_blknr(it->ofs); it->blkaddr += erofs_blknr(it->ofs);
it->kaddr = erofs_read_metabuf(&it->buf, it->sb, it->blkaddr,
it->page = erofs_get_meta_page(it->sb, it->blkaddr); EROFS_KMAP_ATOMIC);
if (IS_ERR(it->page)) { if (IS_ERR(it->kaddr))
int err = PTR_ERR(it->page); return PTR_ERR(it->kaddr);
it->page = NULL;
return err;
}
it->kaddr = kmap_atomic(it->page);
it->ofs = erofs_blkoff(it->ofs); it->ofs = erofs_blkoff(it->ofs);
return 0; return 0;
} }
...@@ -207,11 +173,10 @@ static int inline_xattr_iter_begin(struct xattr_iter *it, ...@@ -207,11 +173,10 @@ static int inline_xattr_iter_begin(struct xattr_iter *it,
it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs); it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs);
it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs); it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
it->page = erofs_get_meta_page(inode->i_sb, it->blkaddr); it->kaddr = erofs_read_metabuf(&it->buf, inode->i_sb, it->blkaddr,
if (IS_ERR(it->page)) EROFS_KMAP_ATOMIC);
return PTR_ERR(it->page); if (IS_ERR(it->kaddr))
return PTR_ERR(it->kaddr);
it->kaddr = kmap_atomic(it->page);
return vi->xattr_isize - xattr_header_sz; return vi->xattr_isize - xattr_header_sz;
} }
...@@ -272,7 +237,7 @@ static int xattr_foreach(struct xattr_iter *it, ...@@ -272,7 +237,7 @@ static int xattr_foreach(struct xattr_iter *it,
it->ofs = 0; it->ofs = 0;
} }
slice = min_t(unsigned int, PAGE_SIZE - it->ofs, slice = min_t(unsigned int, EROFS_BLKSIZ - it->ofs,
entry.e_name_len - processed); entry.e_name_len - processed);
/* handle name */ /* handle name */
...@@ -307,7 +272,7 @@ static int xattr_foreach(struct xattr_iter *it, ...@@ -307,7 +272,7 @@ static int xattr_foreach(struct xattr_iter *it,
it->ofs = 0; it->ofs = 0;
} }
slice = min_t(unsigned int, PAGE_SIZE - it->ofs, slice = min_t(unsigned int, EROFS_BLKSIZ - it->ofs,
value_sz - processed); value_sz - processed);
op->value(it, processed, it->kaddr + it->ofs, slice); op->value(it, processed, it->kaddr + it->ofs, slice);
it->ofs += slice; it->ofs += slice;
...@@ -386,8 +351,6 @@ static int inline_getxattr(struct inode *inode, struct getxattr_iter *it) ...@@ -386,8 +351,6 @@ static int inline_getxattr(struct inode *inode, struct getxattr_iter *it)
if (ret != -ENOATTR) if (ret != -ENOATTR)
break; break;
} }
xattr_iter_end_final(&it->it);
return ret ? ret : it->buffer_size; return ret ? ret : it->buffer_size;
} }
...@@ -404,26 +367,16 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) ...@@ -404,26 +367,16 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]); xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr,
if (!i || blkaddr != it->it.blkaddr) { EROFS_KMAP_ATOMIC);
if (i) if (IS_ERR(it->it.kaddr))
xattr_iter_end(&it->it, true); return PTR_ERR(it->it.kaddr);
it->it.blkaddr = blkaddr;
it->it.page = erofs_get_meta_page(sb, blkaddr);
if (IS_ERR(it->it.page))
return PTR_ERR(it->it.page);
it->it.kaddr = kmap_atomic(it->it.page);
it->it.blkaddr = blkaddr;
}
ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL); ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL);
if (ret != -ENOATTR) if (ret != -ENOATTR)
break; break;
} }
if (vi->xattr_shared_count)
xattr_iter_end_final(&it->it);
return ret ? ret : it->buffer_size; return ret ? ret : it->buffer_size;
} }
...@@ -452,10 +405,11 @@ int erofs_getxattr(struct inode *inode, int index, ...@@ -452,10 +405,11 @@ int erofs_getxattr(struct inode *inode, int index,
return ret; return ret;
it.index = index; it.index = index;
it.name.len = strlen(name); it.name.len = strlen(name);
if (it.name.len > EROFS_NAME_LEN) if (it.name.len > EROFS_NAME_LEN)
return -ERANGE; return -ERANGE;
it.it.buf = __EROFS_BUF_INITIALIZER;
it.name.name = name; it.name.name = name;
it.buffer = buffer; it.buffer = buffer;
...@@ -465,6 +419,7 @@ int erofs_getxattr(struct inode *inode, int index, ...@@ -465,6 +419,7 @@ int erofs_getxattr(struct inode *inode, int index,
ret = inline_getxattr(inode, &it); ret = inline_getxattr(inode, &it);
if (ret == -ENOATTR) if (ret == -ENOATTR)
ret = shared_getxattr(inode, &it); ret = shared_getxattr(inode, &it);
erofs_put_metabuf(&it.it.buf);
return ret; return ret;
} }
...@@ -607,7 +562,6 @@ static int inline_listxattr(struct listxattr_iter *it) ...@@ -607,7 +562,6 @@ static int inline_listxattr(struct listxattr_iter *it)
if (ret) if (ret)
break; break;
} }
xattr_iter_end_final(&it->it);
return ret ? ret : it->buffer_ofs; return ret ? ret : it->buffer_ofs;
} }
...@@ -625,25 +579,16 @@ static int shared_listxattr(struct listxattr_iter *it) ...@@ -625,25 +579,16 @@ static int shared_listxattr(struct listxattr_iter *it)
xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]); xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
if (!i || blkaddr != it->it.blkaddr) { it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr,
if (i) EROFS_KMAP_ATOMIC);
xattr_iter_end(&it->it, true); if (IS_ERR(it->it.kaddr))
return PTR_ERR(it->it.kaddr);
it->it.page = erofs_get_meta_page(sb, blkaddr); it->it.blkaddr = blkaddr;
if (IS_ERR(it->it.page))
return PTR_ERR(it->it.page);
it->it.kaddr = kmap_atomic(it->it.page);
it->it.blkaddr = blkaddr;
}
ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL); ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL);
if (ret) if (ret)
break; break;
} }
if (vi->xattr_shared_count)
xattr_iter_end_final(&it->it);
return ret ? ret : it->buffer_ofs; return ret ? ret : it->buffer_ofs;
} }
...@@ -659,6 +604,7 @@ ssize_t erofs_listxattr(struct dentry *dentry, ...@@ -659,6 +604,7 @@ ssize_t erofs_listxattr(struct dentry *dentry,
if (ret) if (ret)
return ret; return ret;
it.it.buf = __EROFS_BUF_INITIALIZER;
it.dentry = dentry; it.dentry = dentry;
it.buffer = buffer; it.buffer = buffer;
it.buffer_size = buffer_size; it.buffer_size = buffer_size;
...@@ -667,9 +613,10 @@ ssize_t erofs_listxattr(struct dentry *dentry, ...@@ -667,9 +613,10 @@ ssize_t erofs_listxattr(struct dentry *dentry,
it.it.sb = dentry->d_sb; it.it.sb = dentry->d_sb;
ret = inline_listxattr(&it); ret = inline_listxattr(&it);
if (ret < 0 && ret != -ENOATTR) if (ret >= 0 || ret == -ENOATTR)
return ret; ret = shared_listxattr(&it);
return shared_listxattr(&it); erofs_put_metabuf(&it.it.buf);
return ret;
} }
#ifdef CONFIG_EROFS_FS_POSIX_ACL #ifdef CONFIG_EROFS_FS_POSIX_ACL
......
...@@ -86,4 +86,3 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu); ...@@ -86,4 +86,3 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu);
#endif #endif
#endif #endif
This diff is collapsed.
...@@ -62,8 +62,16 @@ struct z_erofs_pcluster { ...@@ -62,8 +62,16 @@ struct z_erofs_pcluster {
/* A: lower limit of decompressed length and if full length or not */ /* A: lower limit of decompressed length and if full length or not */
unsigned int length; unsigned int length;
/* I: physical cluster size in pages */ /* I: page offset of inline compressed data */
unsigned short pclusterpages; unsigned short pageofs_in;
union {
/* I: physical cluster size in pages */
unsigned short pclusterpages;
/* I: tailpacking inline compressed size */
unsigned short tailpacking_size;
};
/* I: compression algorithm format */ /* I: compression algorithm format */
unsigned char algorithmformat; unsigned char algorithmformat;
...@@ -94,6 +102,18 @@ struct z_erofs_decompressqueue { ...@@ -94,6 +102,18 @@ struct z_erofs_decompressqueue {
} u; } u;
}; };
static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
{
return !pcl->obj.index;
}
static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
{
if (z_erofs_is_inline_pcluster(pcl))
return 1;
return pcl->pclusterpages;
}
#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2 #define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1) #define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS) #define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
......
...@@ -7,12 +7,17 @@ ...@@ -7,12 +7,17 @@
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <trace/events/erofs.h> #include <trace/events/erofs.h>
static int z_erofs_do_map_blocks(struct inode *inode,
struct erofs_map_blocks *map,
int flags);
int z_erofs_fill_inode(struct inode *inode) int z_erofs_fill_inode(struct inode *inode)
{ {
struct erofs_inode *const vi = EROFS_I(inode); struct erofs_inode *const vi = EROFS_I(inode);
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
if (!erofs_sb_has_big_pcluster(sbi) && if (!erofs_sb_has_big_pcluster(sbi) &&
!erofs_sb_has_ztailpacking(sbi) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
vi->z_advise = 0; vi->z_advise = 0;
vi->z_algorithmtype[0] = 0; vi->z_algorithmtype[0] = 0;
...@@ -30,7 +35,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) ...@@ -30,7 +35,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
struct super_block *const sb = inode->i_sb; struct super_block *const sb = inode->i_sb;
int err, headnr; int err, headnr;
erofs_off_t pos; erofs_off_t pos;
struct page *page; struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
void *kaddr; void *kaddr;
struct z_erofs_map_header *h; struct z_erofs_map_header *h;
...@@ -51,18 +56,18 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) ...@@ -51,18 +56,18 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
goto out_unlock; goto out_unlock;
DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
!erofs_sb_has_ztailpacking(EROFS_SB(sb)) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, 8); vi->xattr_isize, 8);
page = erofs_get_meta_page(sb, erofs_blknr(pos)); kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos),
if (IS_ERR(page)) { EROFS_KMAP_ATOMIC);
err = PTR_ERR(page); if (IS_ERR(kaddr)) {
err = PTR_ERR(kaddr);
goto out_unlock; goto out_unlock;
} }
kaddr = kmap_atomic(page);
h = kaddr + erofs_blkoff(pos); h = kaddr + erofs_blkoff(pos);
vi->z_advise = le16_to_cpu(h->h_advise); vi->z_advise = le16_to_cpu(h->h_advise);
vi->z_algorithmtype[0] = h->h_algorithmtype & 15; vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
...@@ -94,13 +99,33 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) ...@@ -94,13 +99,33 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
err = -EFSCORRUPTED; err = -EFSCORRUPTED;
goto unmap_done; goto unmap_done;
} }
unmap_done:
erofs_put_metabuf(&buf);
if (err)
goto out_unlock;
if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
struct erofs_map_blocks map = {
.buf = __EROFS_BUF_INITIALIZER
};
vi->z_idata_size = le16_to_cpu(h->h_idata_size);
err = z_erofs_do_map_blocks(inode, &map,
EROFS_GET_BLOCKS_FINDTAIL);
erofs_put_metabuf(&map.buf);
if (!map.m_plen ||
erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) {
erofs_err(sb, "invalid tail-packing pclustersize %llu",
map.m_plen);
err = -EFSCORRUPTED;
}
if (err < 0)
goto out_unlock;
}
/* paired with smp_mb() at the beginning of the function */ /* paired with smp_mb() at the beginning of the function */
smp_mb(); smp_mb();
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
unmap_done:
kunmap_atomic(kaddr);
unlock_page(page);
put_page(page);
out_unlock: out_unlock:
clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
return err; return err;
...@@ -117,37 +142,18 @@ struct z_erofs_maprecorder { ...@@ -117,37 +142,18 @@ struct z_erofs_maprecorder {
u16 clusterofs; u16 clusterofs;
u16 delta[2]; u16 delta[2];
erofs_blk_t pblk, compressedlcs; erofs_blk_t pblk, compressedlcs;
erofs_off_t nextpackoff;
}; };
static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
erofs_blk_t eblk) erofs_blk_t eblk)
{ {
struct super_block *const sb = m->inode->i_sb; struct super_block *const sb = m->inode->i_sb;
struct erofs_map_blocks *const map = m->map;
struct page *mpage = map->mpage;
if (mpage) {
if (mpage->index == eblk) {
if (!m->kaddr)
m->kaddr = kmap_atomic(mpage);
return 0;
}
if (m->kaddr) {
kunmap_atomic(m->kaddr);
m->kaddr = NULL;
}
put_page(mpage);
}
mpage = erofs_get_meta_page(sb, eblk); m->kaddr = erofs_read_metabuf(&m->map->buf, sb, eblk,
if (IS_ERR(mpage)) { EROFS_KMAP_ATOMIC);
map->mpage = NULL; if (IS_ERR(m->kaddr))
return PTR_ERR(mpage); return PTR_ERR(m->kaddr);
}
m->kaddr = kmap_atomic(mpage);
unlock_page(mpage);
map->mpage = mpage;
return 0; return 0;
} }
...@@ -169,6 +175,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, ...@@ -169,6 +175,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
if (err) if (err)
return err; return err;
m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index);
m->lcn = lcn; m->lcn = lcn;
di = m->kaddr + erofs_blkoff(pos); di = m->kaddr + erofs_blkoff(pos);
...@@ -243,12 +250,12 @@ static int get_compacted_la_distance(unsigned int lclusterbits, ...@@ -243,12 +250,12 @@ static int get_compacted_la_distance(unsigned int lclusterbits,
static int unpack_compacted_index(struct z_erofs_maprecorder *m, static int unpack_compacted_index(struct z_erofs_maprecorder *m,
unsigned int amortizedshift, unsigned int amortizedshift,
unsigned int eofs, bool lookahead) erofs_off_t pos, bool lookahead)
{ {
struct erofs_inode *const vi = EROFS_I(m->inode); struct erofs_inode *const vi = EROFS_I(m->inode);
const unsigned int lclusterbits = vi->z_logical_clusterbits; const unsigned int lclusterbits = vi->z_logical_clusterbits;
const unsigned int lomask = (1 << lclusterbits) - 1; const unsigned int lomask = (1 << lclusterbits) - 1;
unsigned int vcnt, base, lo, encodebits, nblk; unsigned int vcnt, base, lo, encodebits, nblk, eofs;
int i; int i;
u8 *in, type; u8 *in, type;
bool big_pcluster; bool big_pcluster;
...@@ -260,8 +267,12 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, ...@@ -260,8 +267,12 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
else else
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* it doesn't equal to round_up(..) */
m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
(vcnt << amortizedshift);
big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
eofs = erofs_blkoff(pos);
base = round_down(eofs, vcnt << amortizedshift); base = round_down(eofs, vcnt << amortizedshift);
in = m->kaddr + base; in = m->kaddr + base;
...@@ -399,8 +410,7 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, ...@@ -399,8 +410,7 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
err = z_erofs_reload_indexes(m, erofs_blknr(pos)); err = z_erofs_reload_indexes(m, erofs_blknr(pos));
if (err) if (err)
return err; return err;
return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos), return unpack_compacted_index(m, amortizedshift, pos, lookahead);
lookahead);
} }
static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
...@@ -583,11 +593,12 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) ...@@ -583,11 +593,12 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
return 0; return 0;
} }
int z_erofs_map_blocks_iter(struct inode *inode, static int z_erofs_do_map_blocks(struct inode *inode,
struct erofs_map_blocks *map, struct erofs_map_blocks *map,
int flags) int flags)
{ {
struct erofs_inode *const vi = EROFS_I(inode); struct erofs_inode *const vi = EROFS_I(inode);
bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
struct z_erofs_maprecorder m = { struct z_erofs_maprecorder m = {
.inode = inode, .inode = inode,
.map = map, .map = map,
...@@ -597,22 +608,8 @@ int z_erofs_map_blocks_iter(struct inode *inode, ...@@ -597,22 +608,8 @@ int z_erofs_map_blocks_iter(struct inode *inode,
unsigned long initial_lcn; unsigned long initial_lcn;
unsigned long long ofs, end; unsigned long long ofs, end;
trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
/* when trying to read beyond EOF, leave it unmapped */
if (map->m_la >= inode->i_size) {
map->m_llen = map->m_la + 1 - inode->i_size;
map->m_la = inode->i_size;
map->m_flags = 0;
goto out;
}
err = z_erofs_fill_inode_lazy(inode);
if (err)
goto out;
lclusterbits = vi->z_logical_clusterbits; lclusterbits = vi->z_logical_clusterbits;
ofs = map->m_la; ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
initial_lcn = ofs >> lclusterbits; initial_lcn = ofs >> lclusterbits;
endoff = ofs & ((1 << lclusterbits) - 1); endoff = ofs & ((1 << lclusterbits) - 1);
...@@ -620,6 +617,9 @@ int z_erofs_map_blocks_iter(struct inode *inode, ...@@ -620,6 +617,9 @@ int z_erofs_map_blocks_iter(struct inode *inode,
if (err) if (err)
goto unmap_out; goto unmap_out;
if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
vi->z_idataoff = m.nextpackoff;
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
end = (m.lcn + 1ULL) << lclusterbits; end = (m.lcn + 1ULL) << lclusterbits;
...@@ -659,11 +659,19 @@ int z_erofs_map_blocks_iter(struct inode *inode, ...@@ -659,11 +659,19 @@ int z_erofs_map_blocks_iter(struct inode *inode,
} }
map->m_llen = end - map->m_la; map->m_llen = end - map->m_la;
map->m_pa = blknr_to_addr(m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn); if (flags & EROFS_GET_BLOCKS_FINDTAIL)
if (err) vi->z_tailextent_headlcn = m.lcn;
goto out; if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
map->m_flags |= EROFS_MAP_META;
map->m_pa = vi->z_idataoff;
map->m_plen = vi->z_idata_size;
} else {
map->m_pa = blknr_to_addr(m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
if (err)
goto out;
}
if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
...@@ -681,14 +689,38 @@ int z_erofs_map_blocks_iter(struct inode *inode, ...@@ -681,14 +689,38 @@ int z_erofs_map_blocks_iter(struct inode *inode,
map->m_flags |= EROFS_MAP_FULL_MAPPED; map->m_flags |= EROFS_MAP_FULL_MAPPED;
} }
unmap_out: unmap_out:
if (m.kaddr) erofs_unmap_metabuf(&m.map->buf);
kunmap_atomic(m.kaddr);
out: out:
erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o", erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
__func__, map->m_la, map->m_pa, __func__, map->m_la, map->m_pa,
map->m_llen, map->m_plen, map->m_flags); map->m_llen, map->m_plen, map->m_flags);
return err;
}
int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map,
int flags)
{
int err = 0;
trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
/* when trying to read beyond EOF, leave it unmapped */
if (map->m_la >= inode->i_size) {
map->m_llen = map->m_la + 1 - inode->i_size;
map->m_la = inode->i_size;
map->m_flags = 0;
goto out;
}
err = z_erofs_fill_inode_lazy(inode);
if (err)
goto out;
err = z_erofs_do_map_blocks(inode, map, flags);
out:
trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
/* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */ /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
...@@ -704,8 +736,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, ...@@ -704,8 +736,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
struct erofs_map_blocks map = { .m_la = offset }; struct erofs_map_blocks map = { .m_la = offset };
ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP); ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
if (map.mpage) erofs_put_metabuf(&map.buf);
put_page(map.mpage);
if (ret < 0) if (ret < 0)
return ret; return ret;
......
...@@ -169,7 +169,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter, ...@@ -169,7 +169,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
__entry->flags ? show_map_flags(__entry->flags) : "NULL") __entry->flags ? show_map_flags(__entry->flags) : "NULL")
); );
DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter, DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_enter,
TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
unsigned flags), unsigned flags),
...@@ -221,7 +221,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit, ...@@ -221,7 +221,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
show_mflags(__entry->mflags), __entry->ret) show_mflags(__entry->mflags), __entry->ret)
); );
DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit, DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_exit,
TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
unsigned flags, int ret), unsigned flags, int ret),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment