Commit 0ee7c3e2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'iomap-5.15-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull iomap updates from Darrick Wong:
 "The most notable externally visible change for this cycle is the
  addition of support for reads to inline tail fragments of files, which
  was requested by the erofs developers; and a correction for a kernel
  memory corruption bug if the sysadmin tries to activate a swapfile
  with more pages than the swapfile header suggests.

  We also now report writeback completion errors to the file mapping
  correctly, instead of munging all errors into EIO.

  Internally, the bulk of the changes are Christoph's patchset to reduce
  the indirect function call count by a third to a half by converting
  iomap iteration from a loop pattern to a generator/consumer pattern.
  As an added bonus, fsdax no longer open-codes iomap apply loops.

  Summary:

   - Simplify the bio_end_page usage in the buffered IO code.

   - Support reading inline data at nonzero offsets for erofs.

   - Fix some typos and bad grammar.

   - Convert kmap_atomic usage in the inline data read path.

   - Add some extra inline data input checking.

   - Fix a memory corruption bug stemming from iomap_swapfile_activate
     trying to activate more pages than mm was expecting.

   - Pass errnos through the page writeback code so that writeback
     errors are reported correctly instead of being munged to EIO.

   - Replace iomap_apply with a open-coded iterator loops to reduce the
     number of indirect calls by a third to a half.

   - Refactor the fsdax code to use iomap iterators instead of the
     open-coded iomap_apply code that it had before.

   - Format file range iomap tracepoint data in hexadecimal and
     standardize the names used in the pretty-print string"

* tag 'iomap-5.15-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (41 commits)
  iomap: standardize tracepoint formatting and storage
  mm/swap: consider max pages in iomap_swapfile_add_extent
  iomap: move loop control code to iter.c
  iomap: constify iomap_iter_srcmap
  fsdax: switch the fault handlers to use iomap_iter
  fsdax: factor out a dax_fault_actor() helper
  fsdax: factor out helpers to simplify the dax fault code
  iomap: rework unshare flag
  iomap: pass an iomap_iter to various buffered I/O helpers
  iomap: remove iomap_apply
  fsdax: switch dax_iomap_rw to use iomap_iter
  iomap: switch iomap_swapfile_activate to use iomap_iter
  iomap: switch iomap_seek_data to use iomap_iter
  iomap: switch iomap_seek_hole to use iomap_iter
  iomap: switch iomap_bmap to use iomap_iter
  iomap: switch iomap_fiemap to use iomap_iter
  iomap: switch __iomap_dio_rw to use iomap_iter
  iomap: switch iomap_page_mkwrite to use iomap_iter
  iomap: switch iomap_zero_range to use iomap_iter
  iomap: switch iomap_file_unshare to use iomap_iter
  ...
parents 916d636e 03b8df8d
......@@ -8247,9 +8247,10 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
return dip;
}
static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter,
struct bio *dio_bio, loff_t file_offset)
{
struct inode *inode = iter->inode;
const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
......@@ -8265,7 +8266,7 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
int ret;
blk_status_t status;
struct btrfs_io_geometry geom;
struct btrfs_dio_data *dio_data = iomap->private;
struct btrfs_dio_data *dio_data = iter->iomap.private;
struct extent_map *em = NULL;
dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
......
......@@ -1912,7 +1912,7 @@ EXPORT_SYMBOL(page_zero_new_buffers);
static void
iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
struct iomap *iomap)
const struct iomap *iomap)
{
loff_t offset = block << inode->i_blkbits;
......@@ -1966,7 +1966,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
}
int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap)
get_block_t *get_block, const struct iomap *iomap)
{
unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
......
This diff is collapsed.
......@@ -1002,7 +1002,7 @@ static void gfs2_write_unlock(struct inode *inode)
}
static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
unsigned len, struct iomap *iomap)
unsigned len)
{
unsigned int blockmask = i_blocksize(inode) - 1;
struct gfs2_sbd *sdp = GFS2_SB(inode);
......@@ -1013,8 +1013,7 @@ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
}
static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
unsigned copied, struct page *page,
struct iomap *iomap)
unsigned copied, struct page *page)
{
struct gfs2_trans *tr = current->journal_info;
struct gfs2_inode *ip = GFS2_I(inode);
......
......@@ -48,8 +48,8 @@ static inline int emergency_thaw_bdev(struct super_block *sb)
/*
* buffer.c
*/
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, const struct iomap *iomap);
/*
* char_dev.c
......
......@@ -9,9 +9,9 @@ ccflags-y += -I $(srctree)/$(src) # needed for trace events
obj-$(CONFIG_FS_IOMAP) += iomap.o
iomap-y += trace.o \
apply.o \
buffered-io.o \
direct-io.o \
fiemap.o \
iter.o \
seek.o
iomap-$(CONFIG_SWAP) += swapfile.o
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (c) 2016-2018 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/iomap.h>
#include "trace.h"
/*
* Execute a iomap write on a segment of the mapping that spans a
* contiguous range of pages that have identical block mapping state.
*
* This avoids the need to map pages individually, do individual allocations
* for each page and most importantly avoid the need for filesystem specific
* locking per page. Instead, all the operations are amortised over the entire
* range of pages. It is assumed that the filesystems will lock whatever
* resources they require in the iomap_begin call, and release them in the
* iomap_end call.
*/
loff_t
iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
const struct iomap_ops *ops, void *data, iomap_actor_t actor)
{
struct iomap iomap = { .type = IOMAP_HOLE };
struct iomap srcmap = { .type = IOMAP_HOLE };
loff_t written = 0, ret;
u64 end;
trace_iomap_apply(inode, pos, length, flags, ops, actor, _RET_IP_);
/*
* Need to map a range from start position for length bytes. This can
* span multiple pages - it is only guaranteed to return a range of a
* single type of pages (e.g. all into a hole, all mapped or all
* unwritten). Failure at this point has nothing to undo.
*
* If allocation is required for this range, reserve the space now so
* that the allocation is guaranteed to succeed later on. Once we copy
* the data into the page cache pages, then we cannot fail otherwise we
* expose transient stale data. If the reserve fails, we can safely
* back out at this point as there is nothing to undo.
*/
ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
if (ret)
return ret;
if (WARN_ON(iomap.offset > pos)) {
written = -EIO;
goto out;
}
if (WARN_ON(iomap.length == 0)) {
written = -EIO;
goto out;
}
trace_iomap_apply_dstmap(inode, &iomap);
if (srcmap.type != IOMAP_HOLE)
trace_iomap_apply_srcmap(inode, &srcmap);
/*
* Cut down the length to the one actually provided by the filesystem,
* as it might not be able to give us the whole size that we requested.
*/
end = iomap.offset + iomap.length;
if (srcmap.type != IOMAP_HOLE)
end = min(end, srcmap.offset + srcmap.length);
if (pos + length > end)
length = end - pos;
/*
* Now that we have guaranteed that the space allocation will succeed,
* we can do the copy-in page by page without having to worry about
* failures exposing transient data.
*
* To support COW operations, we read in data for partially blocks from
* the srcmap if the file system filled it in. In that case we the
* length needs to be limited to the earlier of the ends of the iomaps.
* If the file system did not provide a srcmap we pass in the normal
* iomap into the actors so that they don't need to have special
* handling for the two cases.
*/
written = actor(inode, pos, length, data, &iomap,
srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
out:
/*
* Now the data has been copied, commit the range we've copied. This
* should not fail unless the filesystem has had a fatal error.
*/
if (ops->iomap_end) {
ret = ops->iomap_end(inode, pos, length,
written > 0 ? written : 0,
flags, &iomap);
}
return written ? written : ret;
}
This diff is collapsed.
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016-2018 Christoph Hellwig.
* Copyright (c) 2016-2021 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
......@@ -8,13 +8,8 @@
#include <linux/iomap.h>
#include <linux/fiemap.h>
struct fiemap_ctx {
struct fiemap_extent_info *fi;
struct iomap prev;
};
static int iomap_to_fiemap(struct fiemap_extent_info *fi,
struct iomap *iomap, u32 flags)
const struct iomap *iomap, u32 flags)
{
switch (iomap->type) {
case IOMAP_HOLE:
......@@ -43,24 +38,22 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
iomap->length, flags);
}
static loff_t
iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct iomap *iomap, struct iomap *srcmap)
static loff_t iomap_fiemap_iter(const struct iomap_iter *iter,
struct fiemap_extent_info *fi, struct iomap *prev)
{
struct fiemap_ctx *ctx = data;
loff_t ret = length;
int ret;
if (iomap->type == IOMAP_HOLE)
return length;
if (iter->iomap.type == IOMAP_HOLE)
return iomap_length(iter);
ret = iomap_to_fiemap(ctx->fi, &ctx->prev, 0);
ctx->prev = *iomap;
ret = iomap_to_fiemap(fi, prev, 0);
*prev = iter->iomap;
switch (ret) {
case 0: /* success */
return length;
return iomap_length(iter);
case 1: /* extent array full */
return 0;
default:
default: /* error */
return ret;
}
}
......@@ -68,73 +61,63 @@ iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
u64 start, u64 len, const struct iomap_ops *ops)
{
struct fiemap_ctx ctx;
loff_t ret;
memset(&ctx, 0, sizeof(ctx));
ctx.fi = fi;
ctx.prev.type = IOMAP_HOLE;
struct iomap_iter iter = {
.inode = inode,
.pos = start,
.len = len,
.flags = IOMAP_REPORT,
};
struct iomap prev = {
.type = IOMAP_HOLE,
};
int ret;
ret = fiemap_prep(inode, fi, start, &len, 0);
ret = fiemap_prep(inode, fi, start, &iter.len, 0);
if (ret)
return ret;
while (len > 0) {
ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
iomap_fiemap_actor);
/* inode with no (attribute) mapping will give ENOENT */
if (ret == -ENOENT)
break;
if (ret < 0)
return ret;
if (ret == 0)
break;
start += ret;
len -= ret;
}
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_fiemap_iter(&iter, fi, &prev);
if (ctx.prev.type != IOMAP_HOLE) {
ret = iomap_to_fiemap(fi, &ctx.prev, FIEMAP_EXTENT_LAST);
if (prev.type != IOMAP_HOLE) {
ret = iomap_to_fiemap(fi, &prev, FIEMAP_EXTENT_LAST);
if (ret < 0)
return ret;
}
/* inode with no (attribute) mapping will give ENOENT */
if (ret < 0 && ret != -ENOENT)
return ret;
return 0;
}
EXPORT_SYMBOL_GPL(iomap_fiemap);
static loff_t
iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
void *data, struct iomap *iomap, struct iomap *srcmap)
{
sector_t *bno = data, addr;
if (iomap->type == IOMAP_MAPPED) {
addr = (pos - iomap->offset + iomap->addr) >> inode->i_blkbits;
*bno = addr;
}
return 0;
}
/* legacy ->bmap interface. 0 is the error return (!) */
sector_t
iomap_bmap(struct address_space *mapping, sector_t bno,
const struct iomap_ops *ops)
{
struct inode *inode = mapping->host;
loff_t pos = bno << inode->i_blkbits;
unsigned blocksize = i_blocksize(inode);
struct iomap_iter iter = {
.inode = mapping->host,
.pos = (loff_t)bno << mapping->host->i_blkbits,
.len = i_blocksize(mapping->host),
.flags = IOMAP_REPORT,
};
const unsigned int blkshift = mapping->host->i_blkbits - SECTOR_SHIFT;
int ret;
if (filemap_write_and_wait(mapping))
return 0;
bno = 0;
ret = iomap_apply(inode, pos, blocksize, 0, ops, &bno,
iomap_bmap_actor);
while ((ret = iomap_iter(&iter, ops)) > 0) {
if (iter.iomap.type == IOMAP_MAPPED)
bno = iomap_sector(&iter.iomap, iter.pos) >> blkshift;
/* leave iter.processed unset to abort loop */
}
if (ret)
return 0;
return bno;
}
EXPORT_SYMBOL_GPL(iomap_bmap);
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (c) 2016-2021 Christoph Hellwig.
*/
#include <linux/fs.h>
#include <linux/iomap.h>
#include "trace.h"
static inline int iomap_iter_advance(struct iomap_iter *iter)
{
/* handle the previous iteration (if any) */
if (iter->iomap.length) {
if (iter->processed <= 0)
return iter->processed;
if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
return -EIO;
iter->pos += iter->processed;
iter->len -= iter->processed;
if (!iter->len)
return 0;
}
/* clear the state for the next iteration */
iter->processed = 0;
memset(&iter->iomap, 0, sizeof(iter->iomap));
memset(&iter->srcmap, 0, sizeof(iter->srcmap));
return 1;
}
static inline void iomap_iter_done(struct iomap_iter *iter)
{
WARN_ON_ONCE(iter->iomap.offset > iter->pos);
WARN_ON_ONCE(iter->iomap.length == 0);
WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos);
trace_iomap_iter_dstmap(iter->inode, &iter->iomap);
if (iter->srcmap.type != IOMAP_HOLE)
trace_iomap_iter_srcmap(iter->inode, &iter->srcmap);
}
/**
* iomap_iter - iterate over a ranges in a file
* @iter: iteration structue
* @ops: iomap ops provided by the file system
*
* Iterate over filesystem-provided space mappings for the provided file range.
*
* This function handles cleanup of resources acquired for iteration when the
* filesystem indicates there are no more space mappings, which means that this
* function must be called in a loop that continues as long it returns a
* positive value. If 0 or a negative value is returned, the caller must not
* return to the loop body. Within a loop body, there are two ways to break out
* of the loop body: leave @iter.processed unchanged, or set it to a negative
* errno.
*/
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops)
{
int ret;
if (iter->iomap.length && ops->iomap_end) {
ret = ops->iomap_end(iter->inode, iter->pos, iomap_length(iter),
iter->processed > 0 ? iter->processed : 0,
iter->flags, &iter->iomap);
if (ret < 0 && !iter->processed)
return ret;
}
trace_iomap_iter(iter, ops, _RET_IP_);
ret = iomap_iter_advance(iter);
if (ret <= 0)
return ret;
ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags,
&iter->iomap, &iter->srcmap);
if (ret < 0)
return ret;
iomap_iter_done(iter);
return 1;
}
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2017 Red Hat, Inc.
* Copyright (c) 2018 Christoph Hellwig.
* Copyright (c) 2018-2021 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
......@@ -10,21 +10,20 @@
#include <linux/pagemap.h>
#include <linux/pagevec.h>
static loff_t
iomap_seek_hole_actor(struct inode *inode, loff_t start, loff_t length,
void *data, struct iomap *iomap, struct iomap *srcmap)
static loff_t iomap_seek_hole_iter(const struct iomap_iter *iter,
loff_t *hole_pos)
{
loff_t offset = start;
loff_t length = iomap_length(iter);
switch (iomap->type) {
switch (iter->iomap.type) {
case IOMAP_UNWRITTEN:
offset = mapping_seek_hole_data(inode->i_mapping, start,
start + length, SEEK_HOLE);
if (offset == start + length)
*hole_pos = mapping_seek_hole_data(iter->inode->i_mapping,
iter->pos, iter->pos + length, SEEK_HOLE);
if (*hole_pos == iter->pos + length)
return length;
fallthrough;
return 0;
case IOMAP_HOLE:
*(loff_t *)data = offset;
*hole_pos = iter->pos;
return 0;
default:
return length;
......@@ -32,70 +31,73 @@ iomap_seek_hole_actor(struct inode *inode, loff_t start, loff_t length,
}
loff_t
iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
iomap_seek_hole(struct inode *inode, loff_t pos, const struct iomap_ops *ops)
{
loff_t size = i_size_read(inode);
loff_t ret;
struct iomap_iter iter = {
.inode = inode,
.pos = pos,
.flags = IOMAP_REPORT,
};
int ret;
/* Nothing to be found before or beyond the end of the file. */
if (offset < 0 || offset >= size)
if (pos < 0 || pos >= size)
return -ENXIO;
while (offset < size) {
ret = iomap_apply(inode, offset, size - offset, IOMAP_REPORT,
ops, &offset, iomap_seek_hole_actor);
if (ret < 0)
return ret;
if (ret == 0)
break;
offset += ret;
}
return offset;
iter.len = size - pos;
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_seek_hole_iter(&iter, &pos);
if (ret < 0)
return ret;
if (iter.len) /* found hole before EOF */
return pos;
return size;
}
EXPORT_SYMBOL_GPL(iomap_seek_hole);
static loff_t
iomap_seek_data_actor(struct inode *inode, loff_t start, loff_t length,
void *data, struct iomap *iomap, struct iomap *srcmap)
static loff_t iomap_seek_data_iter(const struct iomap_iter *iter,
loff_t *hole_pos)
{
loff_t offset = start;
loff_t length = iomap_length(iter);
switch (iomap->type) {
switch (iter->iomap.type) {
case IOMAP_HOLE:
return length;
case IOMAP_UNWRITTEN:
offset = mapping_seek_hole_data(inode->i_mapping, start,
start + length, SEEK_DATA);
if (offset < 0)
*hole_pos = mapping_seek_hole_data(iter->inode->i_mapping,
iter->pos, iter->pos + length, SEEK_DATA);
if (*hole_pos < 0)
return length;
fallthrough;
return 0;
default:
*(loff_t *)data = offset;
*hole_pos = iter->pos;
return 0;
}
}
loff_t
iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
iomap_seek_data(struct inode *inode, loff_t pos, const struct iomap_ops *ops)
{
loff_t size = i_size_read(inode);
loff_t ret;
struct iomap_iter iter = {
.inode = inode,
.pos = pos,
.flags = IOMAP_REPORT,
};
int ret;
/* Nothing to be found before or beyond the end of the file. */
if (offset < 0 || offset >= size)
if (pos < 0 || pos >= size)
return -ENXIO;
while (offset < size) {
ret = iomap_apply(inode, offset, size - offset, IOMAP_REPORT,
ops, &offset, iomap_seek_data_actor);
if (ret < 0)
return ret;
if (ret == 0)
return offset;
offset += ret;
}
iter.len = size - pos;
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_seek_data_iter(&iter, &pos);
if (ret < 0)
return ret;
if (iter.len) /* found data before EOF */
return pos;
/* We've reached the end of the file without finding data */
return -ENXIO;
}
......
......@@ -31,11 +31,16 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
{
struct iomap *iomap = &isi->iomap;
unsigned long nr_pages;
unsigned long max_pages;
uint64_t first_ppage;
uint64_t first_ppage_reported;
uint64_t next_ppage;
int error;
if (unlikely(isi->nr_pages >= isi->sis->max))
return 0;
max_pages = isi->sis->max - isi->nr_pages;
/*
* Round the start up and the end down so that the physical
* extent aligns to a page boundary.
......@@ -48,6 +53,7 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
if (first_ppage >= next_ppage)
return 0;
nr_pages = next_ppage - first_ppage;
nr_pages = min(nr_pages, max_pages);
/*
* Calculate how much swap space we're adding; the first page contains
......@@ -88,13 +94,9 @@ static int iomap_swapfile_fail(struct iomap_swapfile_info *isi, const char *str)
* swap only cares about contiguous page-aligned physical extents and makes no
* distinction between written and unwritten extents.
*/
static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
loff_t count, void *data, struct iomap *iomap,
struct iomap *srcmap)
static loff_t iomap_swapfile_iter(const struct iomap_iter *iter,
struct iomap *iomap, struct iomap_swapfile_info *isi)
{
struct iomap_swapfile_info *isi = data;
int error;
switch (iomap->type) {
case IOMAP_MAPPED:
case IOMAP_UNWRITTEN:
......@@ -125,12 +127,12 @@ static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
isi->iomap.length += iomap->length;
} else {
/* Otherwise, add the retained iomap and store this one. */
error = iomap_swapfile_add_extent(isi);
int error = iomap_swapfile_add_extent(isi);
if (error)
return error;
memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
}
return count;
return iomap_length(iter);
}
/*
......@@ -141,16 +143,19 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
struct file *swap_file, sector_t *pagespan,
const struct iomap_ops *ops)
{
struct inode *inode = swap_file->f_mapping->host;
struct iomap_iter iter = {
.inode = inode,
.pos = 0,
.len = ALIGN_DOWN(i_size_read(inode), PAGE_SIZE),
.flags = IOMAP_REPORT,
};
struct iomap_swapfile_info isi = {
.sis = sis,
.lowest_ppage = (sector_t)-1ULL,
.file = swap_file,
};
struct address_space *mapping = swap_file->f_mapping;
struct inode *inode = mapping->host;
loff_t pos = 0;
loff_t len = ALIGN_DOWN(i_size_read(inode), PAGE_SIZE);
loff_t ret;
int ret;
/*
* Persist all file mapping metadata so that we won't have any
......@@ -160,15 +165,10 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
if (ret)
return ret;
while (len > 0) {
ret = iomap_apply(inode, pos, len, IOMAP_REPORT,
ops, &isi, iomap_swapfile_activate_actor);
if (ret <= 0)
return ret;
pos += ret;
len -= ret;
}
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_swapfile_iter(&iter, &iter.iomap, &isi);
if (ret < 0)
return ret;
if (isi.iomap.length) {
ret = iomap_swapfile_add_extent(&isi);
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2009-2019 Christoph Hellwig
* Copyright (c) 2009-2021 Christoph Hellwig
*
* NOTE: none of these tracepoints shall be consider a stable kernel ABI
* NOTE: none of these tracepoints shall be considered a stable kernel ABI
* as they can change at any time.
*
* Current conventions for printing numbers measuring specific units:
*
* offset: byte offset into a subcomponent of a file operation
* pos: file offset, in bytes
* length: length of a file operation, in bytes
* ino: inode number
*
* Numbers describing space allocations should be formatted in hexadecimal.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM iomap
......@@ -42,14 +51,14 @@ DEFINE_READPAGE_EVENT(iomap_readpage);
DEFINE_READPAGE_EVENT(iomap_readahead);
DECLARE_EVENT_CLASS(iomap_range_class,
TP_PROTO(struct inode *inode, unsigned long off, unsigned int len),
TP_PROTO(struct inode *inode, loff_t off, u64 len),
TP_ARGS(inode, off, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, ino)
__field(loff_t, size)
__field(unsigned long, offset)
__field(unsigned int, length)
__field(loff_t, offset)
__field(u64, length)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
......@@ -58,8 +67,7 @@ DECLARE_EVENT_CLASS(iomap_range_class,
__entry->offset = off;
__entry->length = len;
),
TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset %lx "
"length %x",
TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx length 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->size,
......@@ -69,7 +77,7 @@ DECLARE_EVENT_CLASS(iomap_range_class,
#define DEFINE_RANGE_EVENT(name) \
DEFINE_EVENT(iomap_range_class, name, \
TP_PROTO(struct inode *inode, unsigned long off, unsigned int len),\
TP_PROTO(struct inode *inode, loff_t off, u64 len),\
TP_ARGS(inode, off, len))
DEFINE_RANGE_EVENT(iomap_writepage);
DEFINE_RANGE_EVENT(iomap_releasepage);
......@@ -122,8 +130,8 @@ DECLARE_EVENT_CLASS(iomap_class,
__entry->flags = iomap->flags;
__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
),
TP_printk("dev %d:%d ino 0x%llx bdev %d:%d addr %lld offset %lld "
"length %llu type %s flags %s",
TP_printk("dev %d:%d ino 0x%llx bdev %d:%d addr 0x%llx offset 0x%llx "
"length 0x%llx type %s flags %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
MAJOR(__entry->bdev), MINOR(__entry->bdev),
......@@ -138,36 +146,32 @@ DECLARE_EVENT_CLASS(iomap_class,
DEFINE_EVENT(iomap_class, name, \
TP_PROTO(struct inode *inode, struct iomap *iomap), \
TP_ARGS(inode, iomap))
DEFINE_IOMAP_EVENT(iomap_apply_dstmap);
DEFINE_IOMAP_EVENT(iomap_apply_srcmap);
DEFINE_IOMAP_EVENT(iomap_iter_dstmap);
DEFINE_IOMAP_EVENT(iomap_iter_srcmap);
TRACE_EVENT(iomap_apply,
TP_PROTO(struct inode *inode, loff_t pos, loff_t length,
unsigned int flags, const void *ops, void *actor,
unsigned long caller),
TP_ARGS(inode, pos, length, flags, ops, actor, caller),
TRACE_EVENT(iomap_iter,
TP_PROTO(struct iomap_iter *iter, const void *ops,
unsigned long caller),
TP_ARGS(iter, ops, caller),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, ino)
__field(loff_t, pos)
__field(loff_t, length)
__field(u64, length)
__field(unsigned int, flags)
__field(const void *, ops)
__field(void *, actor)
__field(unsigned long, caller)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->length = length;
__entry->flags = flags;
__entry->dev = iter->inode->i_sb->s_dev;
__entry->ino = iter->inode->i_ino;
__entry->pos = iter->pos;
__entry->length = iomap_length(iter);
__entry->flags = iter->flags;
__entry->ops = ops;
__entry->actor = actor;
__entry->caller = caller;
),
TP_printk("dev %d:%d ino 0x%llx pos %lld length %lld flags %s (0x%x) "
"ops %ps caller %pS actor %ps",
TP_printk("dev %d:%d ino 0x%llx pos 0x%llx length 0x%llx flags %s (0x%x) ops %ps caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->pos,
......@@ -175,8 +179,7 @@ TRACE_EVENT(iomap_apply,
__print_flags(__entry->flags, "|", IOMAP_FLAGS_STRINGS),
__entry->flags,
__entry->ops,
(void *)__entry->caller,
__entry->actor)
(void *)__entry->caller)
);
#endif /* _IOMAP_TRACE_H */
......
......@@ -91,12 +91,29 @@ struct iomap {
const struct iomap_page_ops *page_ops;
};
static inline sector_t
iomap_sector(struct iomap *iomap, loff_t pos)
static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
{
return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
}
/*
* Returns the inline data pointer for logical offset @pos.
*/
static inline void *iomap_inline_data(const struct iomap *iomap, loff_t pos)
{
return iomap->inline_data + pos - iomap->offset;
}
/*
* Check if the mapping's length is within the valid range for inline data.
* This is used to guard against accessing data beyond the page inline_data
* points at.
*/
static inline bool iomap_inline_data_valid(const struct iomap *iomap)
{
return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
}
/*
* When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
* and page_done will be called for each page written to. This only applies to
......@@ -108,10 +125,9 @@ iomap_sector(struct iomap *iomap, loff_t pos)
* associated page could not be obtained.
*/
struct iomap_page_ops {
int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len,
struct iomap *iomap);
int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
struct page *page, struct iomap *iomap);
struct page *page);
};
/*
......@@ -124,6 +140,7 @@ struct iomap_page_ops {
#define IOMAP_DIRECT (1 << 4) /* direct I/O */
#define IOMAP_NOWAIT (1 << 5) /* do not block */
#define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */
#define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */
struct iomap_ops {
/*
......@@ -145,15 +162,61 @@ struct iomap_ops {
ssize_t written, unsigned flags, struct iomap *iomap);
};
/*
* Main iomap iterator function.
/**
* struct iomap_iter - Iterate through a range of a file
* @inode: Set at the start of the iteration and should not change.
* @pos: The current file position we are operating on. It is updated by
* calls to iomap_iter(). Treat as read-only in the body.
* @len: The remaining length of the file segment we're operating on.
* It is updated at the same time as @pos.
* @processed: The number of bytes processed by the body in the most recent
* iteration, or a negative errno. 0 causes the iteration to stop.
* @flags: Zero or more of the iomap_begin flags above.
* @iomap: Map describing the I/O iteration
* @srcmap: Source map for COW operations
*/
typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
void *data, struct iomap *iomap, struct iomap *srcmap);
struct iomap_iter {
struct inode *inode;
loff_t pos;
u64 len;
s64 processed;
unsigned flags;
struct iomap iomap;
struct iomap srcmap;
};
loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, const struct iomap_ops *ops, void *data,
iomap_actor_t actor);
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
/**
* iomap_length - length of the current iomap iteration
* @iter: iteration structure
*
* Returns the length that the operation applies to for the current iteration.
*/
static inline u64 iomap_length(const struct iomap_iter *iter)
{
u64 end = iter->iomap.offset + iter->iomap.length;
if (iter->srcmap.type != IOMAP_HOLE)
end = min(end, iter->srcmap.offset + iter->srcmap.length);
return min(iter->len, end - iter->pos);
}
/**
* iomap_iter_srcmap - return the source map for the current iomap iteration
* @i: iteration structure
*
* Write operations on file systems with reflink support might require a
* source and a destination map. This function retourns the source map
* for a given operation, which may or may no be identical to the destination
* map in &i->iomap.
*/
static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
{
if (i->srcmap.type != IOMAP_HOLE)
return &i->srcmap;
return &i->iomap;
}
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops);
......@@ -250,8 +313,8 @@ int iomap_writepages(struct address_space *mapping,
struct iomap_dio_ops {
int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
unsigned flags);
blk_qc_t (*submit_io)(struct inode *inode, struct iomap *iomap,
struct bio *bio, loff_t file_offset);
blk_qc_t (*submit_io)(const struct iomap_iter *iter, struct bio *bio,
loff_t file_offset);
};
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment