squashfs: cache partial compressed blocks

Before commit 93e72b3c ("squashfs: migrate from ll_rw_block usage to BIO"), compressed blocks read by squashfs were cached in the page cache, but that is not the case after that commit. That has lead to squashfs having to re-read a lot of sectors from disk/flash. For example, the first sectors of every metadata block need to be read twice from the disk. Once partially to read the length, and a second time to read the block itself. Also, in linear reads of large files, the last sectors of one data block are re-read from disk when reading the next data block, since the compressed blocks are of variable sizes and not aligned to device blocks. This extra I/O results in a degrade in read performance of, for example, ~16% in one scenario on my ARM platform using squashfs with dm-verity and NAND. Since the decompressed data is cached in the page cache or squashfs' internal metadata and fragment caches, caching _all_ compressed pages would lead to a lot of double caching and is undesirable. But make the code cache any disk blocks which were only partially requested, since these are the ones likely to include data which is needed by other file system blocks. This restores read performance in my test scenario. The compressed block caching is only applied when the disk block size is equal to the page size, to avoid having to deal with caching sub-page reads. [akpm@linux-foundation.org: fs/squashfs/block.c needs linux/pagemap.h] [vincent.whitchurch@axis.com: fix page update race] Link: https://lkml.kernel.org/r/20230526-squashfs-cache-fixup-v1-1-d54a7fa23e7b@axis.com [vincent.whitchurch@axis.com: fix page indices] Link: https://lkml.kernel.org/r/20230526-squashfs-cache-fixup-v1-2-d54a7fa23e7b@axis.com [akpm@linux-foundation.org: fix layout, per hch] Link: https://lkml.kernel.org/r/20230510-squashfs-cache-v4-1-3bd394e1ee71@axis.comSigned-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Phillip Lougher <phillip@squashfs.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

squashfs: cache partial compressed blocks
Before commit 93e72b3c ("squashfs: migrate from ll_rw_block usage to BIO"), compressed blocks read by squashfs were cached in the page cache, but that is not the case after that commit. That has lead to squashfs having to re-read a lot of sectors from disk/flash. For example, the first sectors of every metadata block need to be read twice from the disk. Once partially to read the length, and a second time to read the block itself. Also, in linear reads of large files, the last sectors of one data block are re-read from disk when reading the next data block, since the compressed blocks are of variable sizes and not aligned to device blocks. This extra I/O results in a degrade in read performance of, for example, ~16% in one scenario on my ARM platform using squashfs with dm-verity and NAND. Since the decompressed data is cached in the page cache or squashfs' internal metadata and fragment caches, caching _all_ compressed pages would lead to a lot of double caching and is undesirable. But make the code cache any disk blocks which were only partially requested, since these are the ones likely to include data which is needed by other file system blocks. This restores read performance in my test scenario. The compressed block caching is only applied when the disk block size is equal to the page size, to avoid having to deal with caching sub-page reads. [akpm@linux-foundation.org: fs/squashfs/block.c needs linux/pagemap.h] [vincent.whitchurch@axis.com: fix page update race] Link: https://lkml.kernel.org/r/20230526-squashfs-cache-fixup-v1-1-d54a7fa23e7b@axis.com [vincent.whitchurch@axis.com: fix page indices] Link: https://lkml.kernel.org/r/20230526-squashfs-cache-fixup-v1-2-d54a7fa23e7b@axis.com [akpm@linux-foundation.org: fix layout, per hch] Link: https://lkml.kernel.org/r/20230510-squashfs-cache-v4-1-3bd394e1ee71@axis.comSigned-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Phillip Lougher <phillip@squashfs.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
e994f5b6 · Vincent Whitchurch · Andrew Morton · 6b81459c · e994f5b6 · e994f5b6
Commit e994f5b6 authored May 17, 2023 by Vincent Whitchurch Committed by Andrew Morton Jun 09, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 129 additions and 6 deletions

fs/squashfs/block.c fs/squashfs/block.c +111 -6

fs/squashfs/squashfs_fs_sb.h fs/squashfs/squashfs_fs_sb.h +1 -0

fs/squashfs/super.c fs/squashfs/super.c +17 -0

No files found.
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -17,6 +17,7 @@
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
+#include <linux/pagemap.h>
 #include <linux/string.h>
 #include <linux/bio.h>

@@ -75,10 +76,101 @@ static int copy_bio_to_actor(struct bio *bio,
 	return copied_bytes;
 }

+static int squashfs_bio_read_cached(struct bio *fullbio,
+		struct address_space *cache_mapping, u64 index, int length,
+		u64 read_start, u64 read_end, int page_count)
+{
+	struct page *head_to_cache = NULL, *tail_to_cache = NULL;
+	struct block_device *bdev = fullbio->bi_bdev;
+	int start_idx = 0, end_idx = 0;
+	struct bvec_iter_all iter_all;
+	struct bio *bio = NULL;
+	struct bio_vec *bv;
+	int idx = 0;
+	int err = 0;
+
+	bio_for_each_segment_all(bv, fullbio, iter_all) {
+		struct page *page = bv->bv_page;
+
+		if (page->mapping == cache_mapping) {
+			idx++;
+			continue;
+		}
+
+		/*
+		 * We only use this when the device block size is the same as
+		 * the page size, so read_start and read_end cover full pages.
+		 *
+		 * Compare these to the original required index and length to
+		 * only cache pages which were requested partially, since these
+		 * are the ones which are likely to be needed when reading
+		 * adjacent blocks.
+		 */
+		if (idx == 0 && index != read_start)
+			head_to_cache = page;
+		else if (idx == page_count - 1 && index + length != read_end)
+			tail_to_cache = page;
+
+		if (!bio || idx != end_idx) {
+			struct bio *new = bio_alloc_clone(bdev, fullbio,
+							  GFP_NOIO, &fs_bio_set);
+
+			if (bio) {
+				bio_trim(bio, start_idx * PAGE_SECTORS,
+					 (end_idx - start_idx) * PAGE_SECTORS);
+				bio_chain(bio, new);
+				submit_bio(bio);
+			}
+
+			bio = new;
+			start_idx = idx;
+		}
+
+		idx++;
+		end_idx = idx;
+	}
+
+	if (bio) {
+		bio_trim(bio, start_idx * PAGE_SECTORS,
+			 (end_idx - start_idx) * PAGE_SECTORS);
+		err = submit_bio_wait(bio);
+		bio_put(bio);
+	}
+
+	if (err)
+		return err;
+
+	if (head_to_cache) {
+		int ret = add_to_page_cache_lru(head_to_cache, cache_mapping,
+						read_start >> PAGE_SHIFT,
+						GFP_NOIO);
+
+		if (!ret) {
+			SetPageUptodate(head_to_cache);
+			unlock_page(head_to_cache);
+		}
+
+	}
+
+	if (tail_to_cache) {
+		int ret = add_to_page_cache_lru(tail_to_cache, cache_mapping,
+						(read_end >> PAGE_SHIFT) - 1,
+						GFP_NOIO);
+
+		if (!ret) {
+			SetPageUptodate(tail_to_cache);
+			unlock_page(tail_to_cache);
+		}
+	}
+
+	return 0;
+}
+
 static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
 			     struct bio **biop, int *block_offset)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
+	struct address_space *cache_mapping = msblk->cache_mapping;
 	const u64 read_start = round_down(index, msblk->devblksize);
 	const sector_t block = read_start >> msblk->devblksize_log2;
 	const u64 read_end = round_up(index + length, msblk->devblksize);
@@ -98,21 +190,34 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
 	for (i = 0; i < page_count; ++i) {
 		unsigned int len =
 			min_t(unsigned int, PAGE_SIZE - offset, total_len);
-		struct page *page = alloc_page(GFP_NOIO);
+		struct page *page = NULL;
+
+		if (cache_mapping)
+			page = find_get_page(cache_mapping,
+					     (read_start >> PAGE_SHIFT) + i);
+		if (!page)
+			page = alloc_page(GFP_NOIO);

 		if (!page) {
 			error = -ENOMEM;
 			goto out_free_bio;
 		}
-		if (!bio_add_page(bio, page, len, offset)) {
-			error = -EIO;
-			goto out_free_bio;
-		}
+
+		/*
+		 * Use the __ version to avoid merging since we need each page
+		 * to be separate when we check for and avoid cached pages.
+		 */
+		__bio_add_page(bio, page, len, offset);
 		offset = 0;
 		total_len -= len;
 	}

-	error = submit_bio_wait(bio);
+	if (cache_mapping)
+		error = squashfs_bio_read_cached(bio, cache_mapping, index,
+						 length, read_start, read_end,
+						 page_count);
+	else
+		error = submit_bio_wait(bio);
 	if (error)
 		goto out_free_bio;


--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -47,6 +47,7 @@ struct squashfs_sb_info {
 	struct squashfs_cache			*block_cache;
 	struct squashfs_cache			*fragment_cache;
 	struct squashfs_cache			*read_page;
+	struct address_space			*cache_mapping;
 	int					next_meta_index;
 	__le64					*id_table;
 	__le64					*fragment_index;

--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -329,6 +329,19 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
 		goto failed_mount;
 	}

+	if (msblk->devblksize == PAGE_SIZE) {
+		struct inode *cache = new_inode(sb);
+
+		if (cache == NULL)
+			goto failed_mount;
+
+		set_nlink(cache, 1);
+		cache->i_size = OFFSET_MAX;
+		mapping_set_gfp_mask(cache->i_mapping, GFP_NOFS);
+
+		msblk->cache_mapping = cache->i_mapping;
+	}
+
 	msblk->stream = squashfs_decompressor_setup(sb, flags);
 	if (IS_ERR(msblk->stream)) {
 		err = PTR_ERR(msblk->stream);
@@ -454,6 +467,8 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
 	squashfs_cache_delete(msblk->block_cache);
 	squashfs_cache_delete(msblk->fragment_cache);
 	squashfs_cache_delete(msblk->read_page);
+	if (msblk->cache_mapping)
+		iput(msblk->cache_mapping->host);
 	msblk->thread_ops->destroy(msblk);
 	kfree(msblk->inode_lookup_table);
 	kfree(msblk->fragment_index);
@@ -572,6 +587,8 @@ static void squashfs_put_super(struct super_block *sb)
 		squashfs_cache_delete(sbi->block_cache);
 		squashfs_cache_delete(sbi->fragment_cache);
 		squashfs_cache_delete(sbi->read_page);
+		if (sbi->cache_mapping)
+			iput(sbi->cache_mapping->host);
 		sbi->thread_ops->destroy(sbi);
 		kfree(sbi->id_table);
 		kfree(sbi->fragment_index);