Commit cecf864d authored by Yue Hu's avatar Yue Hu Committed by Gao Xiang

erofs: support inline data decompression

Currently, we have already support tail-packing inline for
uncompressed file, let's also implement this for compressed
files to save I/Os and storage space.

Different from normal pclusters, compressed data is available
in advance because of other metadata I/Os. Therefore, they
directly move into the bypass queue without extra I/O submission.

It's the last compression feature before folio/subpage support.

Link: https://lore.kernel.org/r/20211228232919.21413-1-xiang@kernel.orgReviewed-by: default avatarChao Yu <chao@kernel.org>
Signed-off-by: default avatarYue Hu <huyue2@yulong.com>
Signed-off-by: default avatarGao Xiang <hsiangkao@linux.alibaba.com>
parent ab749bad
...@@ -82,12 +82,13 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages) ...@@ -82,12 +82,13 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)
static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl) static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
{ {
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
int i; int i;
for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;
if (pcl->pclusterpages > pcs->maxpages) if (pclusterpages > pcs->maxpages)
continue; continue;
kmem_cache_free(pcs->slab, pcl); kmem_cache_free(pcs->slab, pcl);
...@@ -298,6 +299,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, ...@@ -298,6 +299,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
container_of(grp, struct z_erofs_pcluster, obj); container_of(grp, struct z_erofs_pcluster, obj);
int i; int i;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
/* /*
* refcount of workgroup is now freezed as 1, * refcount of workgroup is now freezed as 1,
* therefore no need to worry about available decompression users. * therefore no need to worry about available decompression users.
...@@ -331,6 +333,7 @@ int erofs_try_to_free_cached_page(struct page *page) ...@@ -331,6 +333,7 @@ int erofs_try_to_free_cached_page(struct page *page)
if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) { if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
unsigned int i; unsigned int i;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
for (i = 0; i < pcl->pclusterpages; ++i) { for (i = 0; i < pcl->pclusterpages; ++i) {
if (pcl->compressed_pages[i] == page) { if (pcl->compressed_pages[i] == page) {
WRITE_ONCE(pcl->compressed_pages[i], NULL); WRITE_ONCE(pcl->compressed_pages[i], NULL);
...@@ -458,6 +461,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, ...@@ -458,6 +461,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
struct inode *inode, struct inode *inode,
struct erofs_map_blocks *map) struct erofs_map_blocks *map)
{ {
bool ztailpacking = map->m_flags & EROFS_MAP_META;
struct z_erofs_pcluster *pcl; struct z_erofs_pcluster *pcl;
struct z_erofs_collection *cl; struct z_erofs_collection *cl;
struct erofs_workgroup *grp; struct erofs_workgroup *grp;
...@@ -469,12 +473,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, ...@@ -469,12 +473,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
} }
/* no available pcluster, let's allocate one */ /* no available pcluster, let's allocate one */
pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT); pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 :
map->m_plen >> PAGE_SHIFT);
if (IS_ERR(pcl)) if (IS_ERR(pcl))
return PTR_ERR(pcl); return PTR_ERR(pcl);
atomic_set(&pcl->obj.refcount, 1); atomic_set(&pcl->obj.refcount, 1);
pcl->obj.index = map->m_pa >> PAGE_SHIFT;
pcl->algorithmformat = map->m_algorithmformat; pcl->algorithmformat = map->m_algorithmformat;
pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) | pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
(map->m_flags & EROFS_MAP_FULL_MAPPED ? (map->m_flags & EROFS_MAP_FULL_MAPPED ?
...@@ -494,6 +498,13 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, ...@@ -494,6 +498,13 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
mutex_init(&cl->lock); mutex_init(&cl->lock);
DBG_BUGON(!mutex_trylock(&cl->lock)); DBG_BUGON(!mutex_trylock(&cl->lock));
if (ztailpacking) {
pcl->obj.index = 0; /* which indicates ztailpacking */
pcl->pageofs_in = erofs_blkoff(map->m_pa);
pcl->tailpacking_size = map->m_plen;
} else {
pcl->obj.index = map->m_pa >> PAGE_SHIFT;
grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj); grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
if (IS_ERR(grp)) { if (IS_ERR(grp)) {
err = PTR_ERR(grp); err = PTR_ERR(grp);
...@@ -501,10 +512,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, ...@@ -501,10 +512,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
} }
if (grp != &pcl->obj) { if (grp != &pcl->obj) {
clt->pcl = container_of(grp, struct z_erofs_pcluster, obj); clt->pcl = container_of(grp,
struct z_erofs_pcluster, obj);
err = -EEXIST; err = -EEXIST;
goto err_out; goto err_out;
} }
}
/* used to check tail merging loop due to corrupted images */ /* used to check tail merging loop due to corrupted images */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
clt->tailpcl = pcl; clt->tailpcl = pcl;
...@@ -532,17 +545,20 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt, ...@@ -532,17 +545,20 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL); DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
if (!PAGE_ALIGNED(map->m_pa)) { if (map->m_flags & EROFS_MAP_META) {
if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
DBG_BUGON(1); DBG_BUGON(1);
return -EINVAL; return -EFSCORRUPTED;
}
goto tailpacking;
} }
grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT); grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
if (grp) { if (grp) {
clt->pcl = container_of(grp, struct z_erofs_pcluster, obj); clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
} else { } else {
tailpacking:
ret = z_erofs_register_collection(clt, inode, map); ret = z_erofs_register_collection(clt, inode, map);
if (!ret) if (!ret)
goto out; goto out;
if (ret != -EEXIST) if (ret != -EEXIST)
...@@ -558,9 +574,9 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt, ...@@ -558,9 +574,9 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
out: out:
z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS, z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
clt->cl->pagevec, clt->cl->vcnt); clt->cl->pagevec, clt->cl->vcnt);
/* since file-backed online pages are traversed in reverse order */ /* since file-backed online pages are traversed in reverse order */
clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages; clt->icpage_ptr = clt->pcl->compressed_pages +
z_erofs_pclusterpages(clt->pcl);
return 0; return 0;
} }
...@@ -681,14 +697,33 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, ...@@ -681,14 +697,33 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
if (err) if (err)
goto err_out; goto err_out;
/* preload all compressed pages (maybe downgrade role if necessary) */ if (z_erofs_is_inline_pcluster(clt->pcl)) {
if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la)) struct page *mpage;
mpage = erofs_get_meta_page(inode->i_sb,
erofs_blknr(map->m_pa));
if (IS_ERR(mpage)) {
err = PTR_ERR(mpage);
erofs_err(inode->i_sb,
"failed to get inline page, err %d", err);
goto err_out;
}
/* TODO: new subpage feature will get rid of it */
unlock_page(mpage);
WRITE_ONCE(clt->pcl->compressed_pages[0], mpage);
clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
} else {
/* preload all compressed pages (can change mode if needed) */
if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy,
map->m_la))
cache_strategy = TRYALLOC; cache_strategy = TRYALLOC;
else else
cache_strategy = DONTALLOC; cache_strategy = DONTALLOC;
preload_compressed_pages(clt, MNGD_MAPPING(sbi), preload_compressed_pages(clt, MNGD_MAPPING(sbi),
cache_strategy, pagepool); cache_strategy, pagepool);
}
hitted: hitted:
/* /*
...@@ -844,6 +879,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, ...@@ -844,6 +879,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
struct page **pagepool) struct page **pagepool)
{ {
struct erofs_sb_info *const sbi = EROFS_SB(sb); struct erofs_sb_info *const sbi = EROFS_SB(sb);
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
struct z_erofs_pagevec_ctor ctor; struct z_erofs_pagevec_ctor ctor;
unsigned int i, inputsize, outputsize, llen, nr_pages; unsigned int i, inputsize, outputsize, llen, nr_pages;
struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES]; struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
...@@ -925,15 +961,20 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, ...@@ -925,15 +961,20 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
overlapped = false; overlapped = false;
compressed_pages = pcl->compressed_pages; compressed_pages = pcl->compressed_pages;
for (i = 0; i < pcl->pclusterpages; ++i) { for (i = 0; i < pclusterpages; ++i) {
unsigned int pagenr; unsigned int pagenr;
page = compressed_pages[i]; page = compressed_pages[i];
/* all compressed pages ought to be valid */ /* all compressed pages ought to be valid */
DBG_BUGON(!page); DBG_BUGON(!page);
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (z_erofs_is_inline_pcluster(pcl)) {
if (!PageUptodate(page))
err = -EIO;
continue;
}
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (!z_erofs_is_shortlived_page(page)) { if (!z_erofs_is_shortlived_page(page)) {
if (erofs_page_is_managed(sbi, page)) { if (erofs_page_is_managed(sbi, page)) {
if (!PageUptodate(page)) if (!PageUptodate(page))
...@@ -978,11 +1019,16 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, ...@@ -978,11 +1019,16 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
partial = true; partial = true;
} }
inputsize = pcl->pclusterpages * PAGE_SIZE; if (z_erofs_is_inline_pcluster(pcl))
inputsize = pcl->tailpacking_size;
else
inputsize = pclusterpages * PAGE_SIZE;
err = z_erofs_decompress(&(struct z_erofs_decompress_req) { err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
.sb = sb, .sb = sb,
.in = compressed_pages, .in = compressed_pages,
.out = pages, .out = pages,
.pageofs_in = pcl->pageofs_in,
.pageofs_out = cl->pageofs, .pageofs_out = cl->pageofs,
.inputsize = inputsize, .inputsize = inputsize,
.outputsize = outputsize, .outputsize = outputsize,
...@@ -992,8 +1038,13 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, ...@@ -992,8 +1038,13 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
}, pagepool); }, pagepool);
out: out:
/* must handle all compressed pages before ending pages */ /* must handle all compressed pages before actual file pages */
for (i = 0; i < pcl->pclusterpages; ++i) { if (z_erofs_is_inline_pcluster(pcl)) {
page = compressed_pages[0];
WRITE_ONCE(compressed_pages[0], NULL);
put_page(page);
} else {
for (i = 0; i < pclusterpages; ++i) {
page = compressed_pages[i]; page = compressed_pages[i];
if (erofs_page_is_managed(sbi, page)) if (erofs_page_is_managed(sbi, page))
...@@ -1001,9 +1052,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, ...@@ -1001,9 +1052,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
/* recycle all individual short-lived pages */ /* recycle all individual short-lived pages */
(void)z_erofs_put_shortlivedpage(pagepool, page); (void)z_erofs_put_shortlivedpage(pagepool, page);
WRITE_ONCE(compressed_pages[i], NULL); WRITE_ONCE(compressed_pages[i], NULL);
} }
}
for (i = 0; i < nr_pages; ++i) { for (i = 0; i < nr_pages; ++i) {
page = pages[i]; page = pages[i];
...@@ -1288,6 +1339,14 @@ static void z_erofs_submit_queue(struct super_block *sb, ...@@ -1288,6 +1339,14 @@ static void z_erofs_submit_queue(struct super_block *sb,
pcl = container_of(owned_head, struct z_erofs_pcluster, next); pcl = container_of(owned_head, struct z_erofs_pcluster, next);
/* close the main owned chain at first */
owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
Z_EROFS_PCLUSTER_TAIL_CLOSED);
if (z_erofs_is_inline_pcluster(pcl)) {
move_to_bypass_jobqueue(pcl, qtail, owned_head);
continue;
}
/* no device id here, thus it will always succeed */ /* no device id here, thus it will always succeed */
mdev = (struct erofs_map_dev) { mdev = (struct erofs_map_dev) {
.m_pa = blknr_to_addr(pcl->obj.index), .m_pa = blknr_to_addr(pcl->obj.index),
...@@ -1297,10 +1356,6 @@ static void z_erofs_submit_queue(struct super_block *sb, ...@@ -1297,10 +1356,6 @@ static void z_erofs_submit_queue(struct super_block *sb,
cur = erofs_blknr(mdev.m_pa); cur = erofs_blknr(mdev.m_pa);
end = cur + pcl->pclusterpages; end = cur + pcl->pclusterpages;
/* close the main owned chain at first */
owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
Z_EROFS_PCLUSTER_TAIL_CLOSED);
do { do {
struct page *page; struct page *page;
......
...@@ -62,9 +62,17 @@ struct z_erofs_pcluster { ...@@ -62,9 +62,17 @@ struct z_erofs_pcluster {
/* A: lower limit of decompressed length and if full length or not */ /* A: lower limit of decompressed length and if full length or not */
unsigned int length; unsigned int length;
/* I: page offset of inline compressed data */
unsigned short pageofs_in;
union {
/* I: physical cluster size in pages */ /* I: physical cluster size in pages */
unsigned short pclusterpages; unsigned short pclusterpages;
/* I: tailpacking inline compressed size */
unsigned short tailpacking_size;
};
/* I: compression algorithm format */ /* I: compression algorithm format */
unsigned char algorithmformat; unsigned char algorithmformat;
...@@ -94,6 +102,18 @@ struct z_erofs_decompressqueue { ...@@ -94,6 +102,18 @@ struct z_erofs_decompressqueue {
} u; } u;
}; };
static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
{
return !pcl->obj.index;
}
static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
{
if (z_erofs_is_inline_pcluster(pcl))
return 1;
return pcl->pclusterpages;
}
#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2 #define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1) #define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS) #define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment