// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/scrub.h" #include "scrub/trace.h" #include <linux/shmem_fs.h> /* * Swappable Temporary Memory * ========================== * * Online checking sometimes needs to be able to stage a large amount of data * in memory. This information might not fit in the available memory and it * doesn't all need to be accessible at all times. In other words, we want an * indexed data buffer to store data that can be paged out. * * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those * requirements. Therefore, the xfile mechanism uses an unlinked shmem file to * store our staging data. This file is not installed in the file descriptor * table so that user programs cannot access the data, which means that the * xfile must be freed with xfile_destroy. * * xfiles assume that the caller will handle all required concurrency * management; standard vfs locks (freezer and inode) are not taken. Reads * and writes are satisfied directly from the page cache. */ /* * xfiles must not be exposed to userspace and require upper layers to * coordinate access to the one handle returned by the constructor, so * establish a separate lock class for xfiles to avoid confusing lockdep. */ static struct lock_class_key xfile_i_mutex_key; /* * Create an xfile of the given size. The description will be used in the * trace output. */ int xfile_create( const char *description, loff_t isize, struct xfile **xfilep) { struct inode *inode; struct xfile *xf; int error; xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS); if (!xf) return -ENOMEM; xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE); if (IS_ERR(xf->file)) { error = PTR_ERR(xf->file); goto out_xfile; } inode = file_inode(xf->file); lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key); /* * We don't want to bother with kmapping data during repair, so don't * allow highmem pages to back this mapping. */ mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); trace_xfile_create(xf); *xfilep = xf; return 0; out_xfile: kfree(xf); return error; } /* Close the file and release all resources. */ void xfile_destroy( struct xfile *xf) { struct inode *inode = file_inode(xf->file); trace_xfile_destroy(xf); lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key); fput(xf->file); kfree(xf); } /* * Load an object. Since we're treating this file as "memory", any error or * short IO is treated as a failure to allocate memory. */ int xfile_load( struct xfile *xf, void *buf, size_t count, loff_t pos) { struct inode *inode = file_inode(xf->file); unsigned int pflags; if (count > MAX_RW_COUNT) return -ENOMEM; if (inode->i_sb->s_maxbytes - pos < count) return -ENOMEM; trace_xfile_load(xf, pos, count); pflags = memalloc_nofs_save(); while (count > 0) { struct folio *folio; unsigned int len; unsigned int offset; if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_READ) < 0) break; if (!folio) { /* * No data stored at this offset, just zero the output * buffer until the next page boundary. */ len = min_t(ssize_t, count, PAGE_SIZE - offset_in_page(pos)); memset(buf, 0, len); } else { if (filemap_check_wb_err(inode->i_mapping, 0)) { folio_unlock(folio); folio_put(folio); break; } offset = offset_in_folio(folio, pos); len = min_t(ssize_t, count, folio_size(folio) - offset); memcpy(buf, folio_address(folio) + offset, len); folio_unlock(folio); folio_put(folio); } count -= len; pos += len; buf += len; } memalloc_nofs_restore(pflags); if (count) return -ENOMEM; return 0; } /* * Store an object. Since we're treating this file as "memory", any error or * short IO is treated as a failure to allocate memory. */ int xfile_store( struct xfile *xf, const void *buf, size_t count, loff_t pos) { struct inode *inode = file_inode(xf->file); unsigned int pflags; if (count > MAX_RW_COUNT) return -ENOMEM; if (inode->i_sb->s_maxbytes - pos < count) return -ENOMEM; trace_xfile_store(xf, pos, count); /* * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), * actually allocates a folio instead of erroring out. */ if (pos + count > i_size_read(inode)) i_size_write(inode, pos + count); pflags = memalloc_nofs_save(); while (count > 0) { struct folio *folio; unsigned int len; unsigned int offset; if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE) < 0) break; if (filemap_check_wb_err(inode->i_mapping, 0)) { folio_unlock(folio); folio_put(folio); break; } offset = offset_in_folio(folio, pos); len = min_t(ssize_t, count, folio_size(folio) - offset); memcpy(folio_address(folio) + offset, buf, len); folio_mark_dirty(folio); folio_unlock(folio); folio_put(folio); count -= len; pos += len; buf += len; } memalloc_nofs_restore(pflags); if (count) return -ENOMEM; return 0; } /* Find the next written area in the xfile data for a given offset. */ loff_t xfile_seek_data( struct xfile *xf, loff_t pos) { loff_t ret; ret = vfs_llseek(xf->file, pos, SEEK_DATA); trace_xfile_seek_data(xf, pos, ret); return ret; } /* * Grab the (locked) page for a memory object. The object cannot span a page * boundary. Returns 0 (and a locked page) if successful, -ENOTBLK if we * cannot grab the page, or the usual negative errno. */ int xfile_get_page( struct xfile *xf, loff_t pos, unsigned int len, struct xfile_page *xfpage) { struct inode *inode = file_inode(xf->file); struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; struct page *page = NULL; void *fsdata = NULL; loff_t key = round_down(pos, PAGE_SIZE); unsigned int pflags; int error; if (inode->i_sb->s_maxbytes - pos < len) return -ENOMEM; if (len > PAGE_SIZE - offset_in_page(pos)) return -ENOTBLK; trace_xfile_get_page(xf, pos, len); pflags = memalloc_nofs_save(); /* * We call write_begin directly here to avoid all the freezer * protection lock-taking that happens in the normal path. shmem * doesn't support fs freeze, but lockdep doesn't know that and will * trip over that. */ error = aops->write_begin(NULL, mapping, key, PAGE_SIZE, &page, &fsdata); if (error) goto out_pflags; /* We got the page, so make sure we push out EOF. */ if (i_size_read(inode) < pos + len) i_size_write(inode, pos + len); /* * If the page isn't up to date, fill it with zeroes before we hand it * to the caller and make sure the backing store will hold on to them. */ if (!PageUptodate(page)) { memset(page_address(page), 0, PAGE_SIZE); SetPageUptodate(page); } /* * Mark each page dirty so that the contents are written to some * backing store when we drop this buffer, and take an extra reference * to prevent the xfile page from being swapped or removed from the * page cache by reclaim if the caller unlocks the page. */ set_page_dirty(page); get_page(page); xfpage->page = page; xfpage->fsdata = fsdata; xfpage->pos = key; out_pflags: memalloc_nofs_restore(pflags); return error; } /* * Release the (locked) page for a memory object. Returns 0 or a negative * errno. */ int xfile_put_page( struct xfile *xf, struct xfile_page *xfpage) { struct inode *inode = file_inode(xf->file); struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; unsigned int pflags; int ret; trace_xfile_put_page(xf, xfpage->pos, PAGE_SIZE); /* Give back the reference that we took in xfile_get_page. */ put_page(xfpage->page); pflags = memalloc_nofs_save(); ret = aops->write_end(NULL, mapping, xfpage->pos, PAGE_SIZE, PAGE_SIZE, xfpage->page, xfpage->fsdata); memalloc_nofs_restore(pflags); memset(xfpage, 0, sizeof(struct xfile_page)); if (ret < 0) return ret; if (ret != PAGE_SIZE) return -EIO; return 0; }