Commit 1bbb19b6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'erofs-for-6.8-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs fixes from Gao Xiang:

 - fix an infinite loop issue of sub-page compressed data support found
   with lengthy stress tests on a 64k-page arm64 VM

 - optimize the temporary buffer allocation for low-memory scenarios,
   which can reduce 20.21% on average under a heavy multi-app launch
   benchmark workload

 - get rid of unnecessary GFP_NOFS

* tag 'erofs-for-6.8-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: relaxed temporary buffers allocation on readahead
  erofs: fix infinite loop due to a race of filling compressed_bvecs
  erofs: get rid of unneeded GFP_NOFS
parents 2a6526c4 d9281660
......@@ -11,13 +11,12 @@
struct z_erofs_decompress_req {
struct super_block *sb;
struct page **in, **out;
unsigned short pageofs_in, pageofs_out;
unsigned int inputsize, outputsize;
/* indicate the algorithm will be used for decompression */
unsigned int alg;
unsigned int alg; /* the algorithm for decompression */
bool inplace_io, partial_decoding, fillgaps;
gfp_t gfp; /* allocation flags for extra temporary buffers */
};
struct z_erofs_decompressor {
......
......@@ -111,8 +111,9 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
victim = availables[--top];
get_page(victim);
} else {
victim = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
victim = erofs_allocpage(pagepool, rq->gfp);
if (!victim)
return -ENOMEM;
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
}
rq->out[i] = victim;
......
......@@ -95,7 +95,7 @@ int z_erofs_load_deflate_config(struct super_block *sb,
}
int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool)
struct page **pgpl)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
......@@ -158,8 +158,12 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs);
outsz -= strm->z.avail_out;
if (!rq->out[no]) {
rq->out[no] = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
if (!rq->out[no]) {
kout = NULL;
err = -ENOMEM;
break;
}
set_page_private(rq->out[no],
Z_EROFS_SHORTLIVED_PAGE);
}
......@@ -211,8 +215,11 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
rq->in[j]));
tmppage = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
tmppage = erofs_allocpage(pgpl, rq->gfp);
if (!tmppage) {
err = -ENOMEM;
goto failed;
}
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
copy_highpage(tmppage, rq->in[j]);
rq->in[j] = tmppage;
......@@ -230,7 +237,7 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
break;
}
}
failed:
if (zlib_inflateEnd(&strm->z) != Z_OK && !err)
err = -EIO;
if (kout)
......
......@@ -148,7 +148,7 @@ int z_erofs_load_lzma_config(struct super_block *sb,
}
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool)
struct page **pgpl)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
......@@ -215,8 +215,11 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
PAGE_SIZE - pageofs);
outlen -= strm->buf.out_size;
if (!rq->out[no] && rq->fillgaps) { /* deduped */
rq->out[no] = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
if (!rq->out[no]) {
err = -ENOMEM;
break;
}
set_page_private(rq->out[no],
Z_EROFS_SHORTLIVED_PAGE);
}
......@@ -258,8 +261,11 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
rq->in[j]));
tmppage = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
tmppage = erofs_allocpage(pgpl, rq->gfp);
if (!tmppage) {
err = -ENOMEM;
goto failed;
}
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
copy_highpage(tmppage, rq->in[j]);
rq->in[j] = tmppage;
......@@ -277,6 +283,7 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
break;
}
}
failed:
if (no < nrpages_out && strm->buf.out)
kunmap(rq->out[no]);
if (ni < nrpages_in)
......
......@@ -459,7 +459,7 @@ static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
inode->i_blkbits = EROFS_SB(sb)->blkszbits;
inode->i_private = ctx;
......
......@@ -60,7 +60,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
} else {
const unsigned int gotten = sb->s_blocksize - *ofs;
copied = kmalloc(vi->inode_isize, GFP_NOFS);
copied = kmalloc(vi->inode_isize, GFP_KERNEL);
if (!copied) {
err = -ENOMEM;
goto err_out;
......
......@@ -81,7 +81,7 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
repeat:
xa_lock(&sbi->managed_pslots);
pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
NULL, grp, GFP_NOFS);
NULL, grp, GFP_KERNEL);
if (pre) {
if (xa_is_err(pre)) {
pre = ERR_PTR(xa_err(pre));
......
......@@ -82,6 +82,9 @@ struct z_erofs_pcluster {
/* L: indicate several pageofs_outs or not */
bool multibases;
/* L: whether extra buffer allocations are best-effort */
bool besteffort;
/* A: compressed bvecs (can be cached or inplaced pages) */
struct z_erofs_bvec compressed_bvecs[];
};
......@@ -230,7 +233,7 @@ static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter,
struct page *nextpage = *candidate_bvpage;
if (!nextpage) {
nextpage = erofs_allocpage(pagepool, GFP_NOFS);
nextpage = erofs_allocpage(pagepool, GFP_KERNEL);
if (!nextpage)
return -ENOMEM;
set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE);
......@@ -302,7 +305,7 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size)
if (nrpages > pcs->maxpages)
continue;
pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS);
pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL);
if (!pcl)
return ERR_PTR(-ENOMEM);
pcl->pclustersize = size;
......@@ -563,21 +566,19 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
unsigned int i;
if (i_blocksize(fe->inode) != PAGE_SIZE)
return;
if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
if (i_blocksize(fe->inode) != PAGE_SIZE ||
fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
return;
for (i = 0; i < pclusterpages; ++i) {
struct page *page, *newpage;
void *t; /* mark pages just found for debugging */
/* the compressed page was loaded before */
/* Inaccurate check w/o locking to avoid unneeded lookups */
if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
page = find_get_page(mc, pcl->obj.index + i);
if (page) {
t = (void *)((unsigned long)page | 1);
newpage = NULL;
......@@ -597,9 +598,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
t = (void *)((unsigned long)newpage | 1);
}
if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t))
spin_lock(&pcl->obj.lockref.lock);
if (!pcl->compressed_bvecs[i].page) {
pcl->compressed_bvecs[i].page = t;
spin_unlock(&pcl->obj.lockref.lock);
continue;
}
spin_unlock(&pcl->obj.lockref.lock);
if (page)
put_page(page);
......@@ -694,7 +699,7 @@ static void z_erofs_cache_invalidate_folio(struct folio *folio,
DBG_BUGON(stop > folio_size(folio) || stop < length);
if (offset == 0 && stop == folio_size(folio))
while (!z_erofs_cache_release_folio(folio, GFP_NOFS))
while (!z_erofs_cache_release_folio(folio, 0))
cond_resched();
}
......@@ -713,36 +718,30 @@ int erofs_init_managed_cache(struct super_block *sb)
set_nlink(inode, 1);
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &z_erofs_cache_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
EROFS_SB(sb)->managed_cache = inode;
return 0;
}
static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
struct z_erofs_bvec *bvec)
{
struct z_erofs_pcluster *const pcl = fe->pcl;
while (fe->icur > 0) {
if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page,
NULL, bvec->page)) {
pcl->compressed_bvecs[fe->icur] = *bvec;
return true;
}
}
return false;
}
/* callers must be with pcluster lock held */
static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
struct z_erofs_bvec *bvec, bool exclusive)
{
struct z_erofs_pcluster *pcl = fe->pcl;
int ret;
if (exclusive) {
/* give priority for inplaceio to use file pages first */
if (z_erofs_try_inplace_io(fe, bvec))
spin_lock(&pcl->obj.lockref.lock);
while (fe->icur > 0) {
if (pcl->compressed_bvecs[--fe->icur].page)
continue;
pcl->compressed_bvecs[fe->icur] = *bvec;
spin_unlock(&pcl->obj.lockref.lock);
return 0;
}
spin_unlock(&pcl->obj.lockref.lock);
/* otherwise, check if it can be used as a bvpage */
if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
!fe->candidate_bvpage)
......@@ -964,7 +963,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
}
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct page *page)
struct page *page, bool ra)
{
struct inode *const inode = fe->inode;
struct erofs_map_blocks *const map = &fe->map;
......@@ -1014,6 +1013,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
err = z_erofs_pcluster_begin(fe);
if (err)
goto out;
fe->pcl->besteffort |= !ra;
}
/*
......@@ -1280,6 +1280,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
.inplace_io = overlapped,
.partial_decoding = pcl->partial,
.fillgaps = pcl->multibases,
.gfp = pcl->besteffort ?
GFP_KERNEL | __GFP_NOFAIL :
GFP_NOWAIT | __GFP_NORETRY
}, be->pagepool);
/* must handle all compressed pages before actual file pages */
......@@ -1322,6 +1325,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
pcl->length = 0;
pcl->partial = true;
pcl->multibases = false;
pcl->besteffort = false;
pcl->bvset.nextpage = NULL;
pcl->vcnt = 0;
......@@ -1423,23 +1427,26 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
{
gfp_t gfp = mapping_gfp_mask(mc);
bool tocache = false;
struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr;
struct z_erofs_bvec zbv;
struct address_space *mapping;
struct page *page, *oldpage;
struct page *page;
int justfound, bs = i_blocksize(f->inode);
/* Except for inplace pages, the entire page can be used for I/Os */
bvec->bv_offset = 0;
bvec->bv_len = PAGE_SIZE;
repeat:
oldpage = READ_ONCE(zbv->page);
if (!oldpage)
spin_lock(&pcl->obj.lockref.lock);
zbv = pcl->compressed_bvecs[nr];
page = zbv.page;
justfound = (unsigned long)page & 1UL;
page = (struct page *)((unsigned long)page & ~1UL);
pcl->compressed_bvecs[nr].page = page;
spin_unlock(&pcl->obj.lockref.lock);
if (!page)
goto out_allocpage;
justfound = (unsigned long)oldpage & 1UL;
page = (struct page *)((unsigned long)oldpage & ~1UL);
bvec->bv_page = page;
DBG_BUGON(z_erofs_is_shortlived_page(page));
/*
* Handle preallocated cached pages. We tried to allocate such pages
......@@ -1448,7 +1455,6 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
*/
if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
set_page_private(page, 0);
WRITE_ONCE(zbv->page, page);
tocache = true;
goto out_tocache;
}
......@@ -1459,9 +1465,9 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
* therefore it is impossible for `mapping` to be NULL.
*/
if (mapping && mapping != mc) {
if (zbv->offset < 0)
bvec->bv_offset = round_up(-zbv->offset, bs);
bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset;
if (zbv.offset < 0)
bvec->bv_offset = round_up(-zbv.offset, bs);
bvec->bv_len = round_up(zbv.end, bs) - bvec->bv_offset;
return;
}
......@@ -1471,7 +1477,6 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
/* the cached page is still in managed cache */
if (page->mapping == mc) {
WRITE_ONCE(zbv->page, page);
/*
* The cached page is still available but without a valid
* `->private` pcluster hint. Let's reconnect them.
......@@ -1503,11 +1508,15 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
put_page(page);
out_allocpage:
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
if (oldpage != cmpxchg(&zbv->page, oldpage, page)) {
spin_lock(&pcl->obj.lockref.lock);
if (pcl->compressed_bvecs[nr].page) {
erofs_pagepool_add(&f->pagepool, page);
spin_unlock(&pcl->obj.lockref.lock);
cond_resched();
goto repeat;
}
pcl->compressed_bvecs[nr].page = page;
spin_unlock(&pcl->obj.lockref.lock);
bvec->bv_page = page;
out_tocache:
if (!tocache || bs != PAGE_SIZE ||
......@@ -1685,6 +1694,7 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
if (cur + bvec.bv_len > end)
bvec.bv_len = end - cur;
DBG_BUGON(bvec.bv_len < sb->s_blocksize);
if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
bvec.bv_offset))
goto submit_bio_retry;
......@@ -1785,7 +1795,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
if (PageUptodate(page))
unlock_page(page);
else
(void)z_erofs_do_read_page(f, page);
(void)z_erofs_do_read_page(f, page, !!rac);
put_page(page);
}
......@@ -1806,7 +1816,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
z_erofs_pcluster_readmore(&f, NULL, true);
err = z_erofs_do_read_page(&f, &folio->page);
err = z_erofs_do_read_page(&f, &folio->page, false);
z_erofs_pcluster_readmore(&f, NULL, false);
z_erofs_pcluster_end(&f);
......@@ -1847,7 +1857,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
folio = head;
head = folio_get_private(folio);
err = z_erofs_do_read_page(&f, &folio->page);
err = z_erofs_do_read_page(&f, &folio->page, true);
if (err && err != -EINTR)
erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
folio->index, EROFS_I(inode)->nid);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment