Commit 26473f83 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'iomap-5.3-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull iomap split/cleanup from Darrick Wong:
 "As promised, here's the second part of the iomap merge for 5.3, in
  which we break up iomap.c into smaller files grouped by functional
  area so that it'll be easier in the long run to maintain cohesiveness
  of code units and to review incoming patches. There are no functional
  changes and fs/iomap.c split cleanly.

  Summary:

   - Regroup the fs/iomap.c code by major functional area so that we can
     start development for 5.4 from a more stable base"

* tag 'iomap-5.3-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  iomap: move internal declarations into fs/iomap/
  iomap: move the main iteration code into a separate file
  iomap: move the buffered IO code into a separate file
  iomap: move the direct IO code into a separate file
  iomap: move the SEEK_HOLE code into a separate file
  iomap: move the file mapping reporting code into a separate file
  iomap: move the swapfile code into a separate file
  iomap: start moving code to fs/iomap/
parents 4f5ed131 5d907307
......@@ -8415,6 +8415,7 @@ L: linux-fsdevel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
S: Supported
F: fs/iomap.c
F: fs/iomap/
F: include/linux/iomap.h
IOMMU DRIVERS
......
......@@ -52,7 +52,7 @@ obj-$(CONFIG_COREDUMP) += coredump.o
obj-$(CONFIG_SYSCTL) += drop_caches.o
obj-$(CONFIG_FHANDLE) += fhandle.o
obj-$(CONFIG_FS_IOMAP) += iomap.o
obj-y += iomap/
obj-y += quota/
......
......@@ -26,7 +26,6 @@
#include <linux/mmu_notifier.h>
#include <linux/iomap.h>
#include <asm/pgalloc.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/fs_dax.h>
......
......@@ -185,15 +185,5 @@ extern const struct dentry_operations ns_dentry_operations;
extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
unsigned long arg);
/*
* iomap support:
*/
typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
void *data, struct iomap *iomap);
loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, const struct iomap_ops *ops, void *data,
iomap_actor_t actor);
/* direct-io.c: */
int sb_init_dio_done_wq(struct super_block *sb);
# SPDX-License-Identifier: GPL-2.0-or-newer
#
# Copyright (c) 2019 Oracle.
# All Rights Reserved.
#
obj-$(CONFIG_FS_IOMAP) += iomap.o
iomap-y += \
apply.o \
buffered-io.o \
direct-io.o \
fiemap.o \
seek.o
iomap-$(CONFIG_SWAP) += swapfile.o
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (c) 2016-2018 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/iomap.h>
/*
* Execute a iomap write on a segment of the mapping that spans a
* contiguous range of pages that have identical block mapping state.
*
* This avoids the need to map pages individually, do individual allocations
* for each page and most importantly avoid the need for filesystem specific
* locking per page. Instead, all the operations are amortised over the entire
* range of pages. It is assumed that the filesystems will lock whatever
* resources they require in the iomap_begin call, and release them in the
* iomap_end call.
*/
loff_t
iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
const struct iomap_ops *ops, void *data, iomap_actor_t actor)
{
struct iomap iomap = { 0 };
loff_t written = 0, ret;
/*
* Need to map a range from start position for length bytes. This can
* span multiple pages - it is only guaranteed to return a range of a
* single type of pages (e.g. all into a hole, all mapped or all
* unwritten). Failure at this point has nothing to undo.
*
* If allocation is required for this range, reserve the space now so
* that the allocation is guaranteed to succeed later on. Once we copy
* the data into the page cache pages, then we cannot fail otherwise we
* expose transient stale data. If the reserve fails, we can safely
* back out at this point as there is nothing to undo.
*/
ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
if (ret)
return ret;
if (WARN_ON(iomap.offset > pos))
return -EIO;
if (WARN_ON(iomap.length == 0))
return -EIO;
/*
* Cut down the length to the one actually provided by the filesystem,
* as it might not be able to give us the whole size that we requested.
*/
if (iomap.offset + iomap.length < pos + length)
length = iomap.offset + iomap.length - pos;
/*
* Now that we have guaranteed that the space allocation will succeed.
* we can do the copy-in page by page without having to worry about
* failures exposing transient data.
*/
written = actor(inode, pos, length, data, &iomap);
/*
* Now the data has been copied, commit the range we've copied. This
* should not fail unless the filesystem has had a fatal error.
*/
if (ops->iomap_end) {
ret = ops->iomap_end(inode, pos, length,
written > 0 ? written : 0,
flags, &iomap);
}
return written ? written : ret;
}
This diff is collapsed.
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016-2018 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/iomap.h>
struct fiemap_ctx {
struct fiemap_extent_info *fi;
struct iomap prev;
};
static int iomap_to_fiemap(struct fiemap_extent_info *fi,
struct iomap *iomap, u32 flags)
{
switch (iomap->type) {
case IOMAP_HOLE:
/* skip holes */
return 0;
case IOMAP_DELALLOC:
flags |= FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN;
break;
case IOMAP_MAPPED:
break;
case IOMAP_UNWRITTEN:
flags |= FIEMAP_EXTENT_UNWRITTEN;
break;
case IOMAP_INLINE:
flags |= FIEMAP_EXTENT_DATA_INLINE;
break;
}
if (iomap->flags & IOMAP_F_MERGED)
flags |= FIEMAP_EXTENT_MERGED;
if (iomap->flags & IOMAP_F_SHARED)
flags |= FIEMAP_EXTENT_SHARED;
return fiemap_fill_next_extent(fi, iomap->offset,
iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0,
iomap->length, flags);
}
static loff_t
iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct iomap *iomap)
{
struct fiemap_ctx *ctx = data;
loff_t ret = length;
if (iomap->type == IOMAP_HOLE)
return length;
ret = iomap_to_fiemap(ctx->fi, &ctx->prev, 0);
ctx->prev = *iomap;
switch (ret) {
case 0: /* success */
return length;
case 1: /* extent array full */
return 0;
default:
return ret;
}
}
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
loff_t start, loff_t len, const struct iomap_ops *ops)
{
struct fiemap_ctx ctx;
loff_t ret;
memset(&ctx, 0, sizeof(ctx));
ctx.fi = fi;
ctx.prev.type = IOMAP_HOLE;
ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC);
if (ret)
return ret;
if (fi->fi_flags & FIEMAP_FLAG_SYNC) {
ret = filemap_write_and_wait(inode->i_mapping);
if (ret)
return ret;
}
while (len > 0) {
ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
iomap_fiemap_actor);
/* inode with no (attribute) mapping will give ENOENT */
if (ret == -ENOENT)
break;
if (ret < 0)
return ret;
if (ret == 0)
break;
start += ret;
len -= ret;
}
if (ctx.prev.type != IOMAP_HOLE) {
ret = iomap_to_fiemap(fi, &ctx.prev, FIEMAP_EXTENT_LAST);
if (ret < 0)
return ret;
}
return 0;
}
EXPORT_SYMBOL_GPL(iomap_fiemap);
static loff_t
iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
void *data, struct iomap *iomap)
{
sector_t *bno = data, addr;
if (iomap->type == IOMAP_MAPPED) {
addr = (pos - iomap->offset + iomap->addr) >> inode->i_blkbits;
if (addr > INT_MAX)
WARN(1, "would truncate bmap result\n");
else
*bno = addr;
}
return 0;
}
/* legacy ->bmap interface. 0 is the error return (!) */
sector_t
iomap_bmap(struct address_space *mapping, sector_t bno,
const struct iomap_ops *ops)
{
struct inode *inode = mapping->host;
loff_t pos = bno << inode->i_blkbits;
unsigned blocksize = i_blocksize(inode);
if (filemap_write_and_wait(mapping))
return 0;
bno = 0;
iomap_apply(inode, pos, blocksize, 0, ops, &bno, iomap_bmap_actor);
return bno;
}
EXPORT_SYMBOL_GPL(iomap_bmap);
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2017 Red Hat, Inc.
* Copyright (c) 2018 Christoph Hellwig.
*/
#include <linux/module.h>
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
/*
* Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
* Returns true if found and updates @lastoff to the offset in file.
*/
static bool
page_seek_hole_data(struct inode *inode, struct page *page, loff_t *lastoff,
int whence)
{
const struct address_space_operations *ops = inode->i_mapping->a_ops;
unsigned int bsize = i_blocksize(inode), off;
bool seek_data = whence == SEEK_DATA;
loff_t poff = page_offset(page);
if (WARN_ON_ONCE(*lastoff >= poff + PAGE_SIZE))
return false;
if (*lastoff < poff) {
/*
* Last offset smaller than the start of the page means we found
* a hole:
*/
if (whence == SEEK_HOLE)
return true;
*lastoff = poff;
}
/*
* Just check the page unless we can and should check block ranges:
*/
if (bsize == PAGE_SIZE || !ops->is_partially_uptodate)
return PageUptodate(page) == seek_data;
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping))
goto out_unlock_not_found;
for (off = 0; off < PAGE_SIZE; off += bsize) {
if (offset_in_page(*lastoff) >= off + bsize)
continue;
if (ops->is_partially_uptodate(page, off, bsize) == seek_data) {
unlock_page(page);
return true;
}
*lastoff = poff + off + bsize;
}
out_unlock_not_found:
unlock_page(page);
return false;
}
/*
* Seek for SEEK_DATA / SEEK_HOLE in the page cache.
*
* Within unwritten extents, the page cache determines which parts are holes
* and which are data: uptodate buffer heads count as data; everything else
* counts as a hole.
*
* Returns the resulting offset on successs, and -ENOENT otherwise.
*/
static loff_t
page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
int whence)
{
pgoff_t index = offset >> PAGE_SHIFT;
pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
loff_t lastoff = offset;
struct pagevec pvec;
if (length <= 0)
return -ENOENT;
pagevec_init(&pvec);
do {
unsigned nr_pages, i;
nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index,
end - 1);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
if (page_seek_hole_data(inode, page, &lastoff, whence))
goto check_range;
lastoff = page_offset(page) + PAGE_SIZE;
}
pagevec_release(&pvec);
} while (index < end);
/* When no page at lastoff and we are not done, we found a hole. */
if (whence != SEEK_HOLE)
goto not_found;
check_range:
if (lastoff < offset + length)
goto out;
not_found:
lastoff = -ENOENT;
out:
pagevec_release(&pvec);
return lastoff;
}
static loff_t
iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
void *data, struct iomap *iomap)
{
switch (iomap->type) {
case IOMAP_UNWRITTEN:
offset = page_cache_seek_hole_data(inode, offset, length,
SEEK_HOLE);
if (offset < 0)
return length;
/* fall through */
case IOMAP_HOLE:
*(loff_t *)data = offset;
return 0;
default:
return length;
}
}
loff_t
iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
{
loff_t size = i_size_read(inode);
loff_t length = size - offset;
loff_t ret;
/* Nothing to be found before or beyond the end of the file. */
if (offset < 0 || offset >= size)
return -ENXIO;
while (length > 0) {
ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
&offset, iomap_seek_hole_actor);
if (ret < 0)
return ret;
if (ret == 0)
break;
offset += ret;
length -= ret;
}
return offset;
}
EXPORT_SYMBOL_GPL(iomap_seek_hole);
static loff_t
iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
void *data, struct iomap *iomap)
{
switch (iomap->type) {
case IOMAP_HOLE:
return length;
case IOMAP_UNWRITTEN:
offset = page_cache_seek_hole_data(inode, offset, length,
SEEK_DATA);
if (offset < 0)
return length;
/*FALLTHRU*/
default:
*(loff_t *)data = offset;
return 0;
}
}
loff_t
iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
{
loff_t size = i_size_read(inode);
loff_t length = size - offset;
loff_t ret;
/* Nothing to be found before or beyond the end of the file. */
if (offset < 0 || offset >= size)
return -ENXIO;
while (length > 0) {
ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
&offset, iomap_seek_data_actor);
if (ret < 0)
return ret;
if (ret == 0)
break;
offset += ret;
length -= ret;
}
if (length <= 0)
return -ENXIO;
return offset;
}
EXPORT_SYMBOL_GPL(iomap_seek_data);
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2018 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
#include <linux/module.h>
#include <linux/compiler.h>
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/swap.h>
/* Swapfile activation */
struct iomap_swapfile_info {
struct iomap iomap; /* accumulated iomap */
struct swap_info_struct *sis;
uint64_t lowest_ppage; /* lowest physical addr seen (pages) */
uint64_t highest_ppage; /* highest physical addr seen (pages) */
unsigned long nr_pages; /* number of pages collected */
int nr_extents; /* extent count */
};
/*
* Collect physical extents for this swap file. Physical extents reported to
* the swap code must be trimmed to align to a page boundary. The logical
* offset within the file is irrelevant since the swapfile code maps logical
* page numbers of the swap device to the physical page-aligned extents.
*/
static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
{
struct iomap *iomap = &isi->iomap;
unsigned long nr_pages;
uint64_t first_ppage;
uint64_t first_ppage_reported;
uint64_t next_ppage;
int error;
/*
* Round the start up and the end down so that the physical
* extent aligns to a page boundary.
*/
first_ppage = ALIGN(iomap->addr, PAGE_SIZE) >> PAGE_SHIFT;
next_ppage = ALIGN_DOWN(iomap->addr + iomap->length, PAGE_SIZE) >>
PAGE_SHIFT;
/* Skip too-short physical extents. */
if (first_ppage >= next_ppage)
return 0;
nr_pages = next_ppage - first_ppage;
/*
* Calculate how much swap space we're adding; the first page contains
* the swap header and doesn't count. The mm still wants that first
* page fed to add_swap_extent, however.
*/
first_ppage_reported = first_ppage;
if (iomap->offset == 0)
first_ppage_reported++;
if (isi->lowest_ppage > first_ppage_reported)
isi->lowest_ppage = first_ppage_reported;
if (isi->highest_ppage < (next_ppage - 1))
isi->highest_ppage = next_ppage - 1;
/* Add extent, set up for the next call. */
error = add_swap_extent(isi->sis, isi->nr_pages, nr_pages, first_ppage);
if (error < 0)
return error;
isi->nr_extents += error;
isi->nr_pages += nr_pages;
return 0;
}
/*
* Accumulate iomaps for this swap file. We have to accumulate iomaps because
* swap only cares about contiguous page-aligned physical extents and makes no
* distinction between written and unwritten extents.
*/
static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
loff_t count, void *data, struct iomap *iomap)
{
struct iomap_swapfile_info *isi = data;
int error;
switch (iomap->type) {
case IOMAP_MAPPED:
case IOMAP_UNWRITTEN:
/* Only real or unwritten extents. */
break;
case IOMAP_INLINE:
/* No inline data. */
pr_err("swapon: file is inline\n");
return -EINVAL;
default:
pr_err("swapon: file has unallocated extents\n");
return -EINVAL;
}
/* No uncommitted metadata or shared blocks. */
if (iomap->flags & IOMAP_F_DIRTY) {
pr_err("swapon: file is not committed\n");
return -EINVAL;
}
if (iomap->flags & IOMAP_F_SHARED) {
pr_err("swapon: file has shared extents\n");
return -EINVAL;
}
/* Only one bdev per swap file. */
if (iomap->bdev != isi->sis->bdev) {
pr_err("swapon: file is on multiple devices\n");
return -EINVAL;
}
if (isi->iomap.length == 0) {
/* No accumulated extent, so just store it. */
memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
} else if (isi->iomap.addr + isi->iomap.length == iomap->addr) {
/* Append this to the accumulated extent. */
isi->iomap.length += iomap->length;
} else {
/* Otherwise, add the retained iomap and store this one. */
error = iomap_swapfile_add_extent(isi);
if (error)
return error;
memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
}
return count;
}
/*
* Iterate a swap file's iomaps to construct physical extents that can be
* passed to the swapfile subsystem.
*/
int iomap_swapfile_activate(struct swap_info_struct *sis,
struct file *swap_file, sector_t *pagespan,
const struct iomap_ops *ops)
{
struct iomap_swapfile_info isi = {
.sis = sis,
.lowest_ppage = (sector_t)-1ULL,
};
struct address_space *mapping = swap_file->f_mapping;
struct inode *inode = mapping->host;
loff_t pos = 0;
loff_t len = ALIGN_DOWN(i_size_read(inode), PAGE_SIZE);
loff_t ret;
/*
* Persist all file mapping metadata so that we won't have any
* IOMAP_F_DIRTY iomaps.
*/
ret = vfs_fsync(swap_file, 1);
if (ret)
return ret;
while (len > 0) {
ret = iomap_apply(inode, pos, len, IOMAP_REPORT,
ops, &isi, iomap_swapfile_activate_actor);
if (ret <= 0)
return ret;
pos += ret;
len -= ret;
}
if (isi.iomap.length) {
ret = iomap_swapfile_add_extent(&isi);
if (ret)
return ret;
}
*pagespan = 1 + isi.highest_ppage - isi.lowest_ppage;
sis->max = isi.nr_pages;
sis->pages = isi.nr_pages - 1;
sis->highest_bit = isi.nr_pages - 1;
return isi.nr_extents;
}
EXPORT_SYMBOL_GPL(iomap_swapfile_activate);
......@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/mm_types.h>
#include <linux/blkdev.h>
struct address_space;
struct fiemap_extent_info;
......@@ -69,6 +70,12 @@ struct iomap {
const struct iomap_page_ops *page_ops;
};
static inline sector_t
iomap_sector(struct iomap *iomap, loff_t pos)
{
return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
}
/*
* When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
* and page_done will be called for each page written to. This only applies to
......@@ -115,6 +122,16 @@ struct iomap_ops {
ssize_t written, unsigned flags, struct iomap *iomap);
};
/*
* Main iomap iterator function.
*/
typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
void *data, struct iomap *iomap);
loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, const struct iomap_ops *ops, void *data,
iomap_actor_t actor);
/*
* Structure allocate for each page when block size < PAGE_SIZE to track
* sub-page uptodate status and I/O completions.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment