Commit c364b6d0 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: fix bmv_count confusion w/ shared extents

In a bmapx call, bmv_count is the total size of the array, including the
zeroth element that userspace uses to supply the search key.  The output
array starts at offset 1 so that we can set up the user for the next
invocation.  Since we now can split an extent into multiple bmap records
due to shared/unshared status, we have to be careful that we don't
overflow the output array.

In the original patch f86f4037 ("xfs: teach get_bmapx about shared
extents and the CoW fork") I used cur_ext (the output index) to check
for overflows, albeit with an off-by-one error.  Since nexleft no longer
describes the number of unfilled slots in the output, we can rip all
that out and use cur_ext for the overflow check directly.

Failure to do this causes heap corruption in bmapx callers such as
xfs_io and xfs_scrub.  xfs/328 can reproduce this problem.
Reviewed-by: default avatarEric Sandeen <sandeen@redhat.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 2aa6ba7b
...@@ -528,7 +528,6 @@ xfs_getbmap( ...@@ -528,7 +528,6 @@ xfs_getbmap(
xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_bmbt_irec_t *map; /* buffer for user's data */
xfs_mount_t *mp; /* file system mount point */ xfs_mount_t *mp; /* file system mount point */
int nex; /* # of user extents can do */ int nex; /* # of user extents can do */
int nexleft; /* # of user extents left */
int subnex; /* # of bmapi's can do */ int subnex; /* # of bmapi's can do */
int nmap; /* number of map entries */ int nmap; /* number of map entries */
struct getbmapx *out; /* output structure */ struct getbmapx *out; /* output structure */
...@@ -686,10 +685,8 @@ xfs_getbmap( ...@@ -686,10 +685,8 @@ xfs_getbmap(
goto out_free_map; goto out_free_map;
} }
nexleft = nex;
do { do {
nmap = (nexleft > subnex) ? subnex : nexleft; nmap = (nex> subnex) ? subnex : nex;
error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
XFS_BB_TO_FSB(mp, bmv->bmv_length), XFS_BB_TO_FSB(mp, bmv->bmv_length),
map, &nmap, bmapi_flags); map, &nmap, bmapi_flags);
...@@ -697,8 +694,8 @@ xfs_getbmap( ...@@ -697,8 +694,8 @@ xfs_getbmap(
goto out_free_map; goto out_free_map;
ASSERT(nmap <= subnex); ASSERT(nmap <= subnex);
for (i = 0; i < nmap && nexleft && bmv->bmv_length && for (i = 0; i < nmap && bmv->bmv_length &&
cur_ext < bmv->bmv_count; i++) { cur_ext < bmv->bmv_count - 1; i++) {
out[cur_ext].bmv_oflags = 0; out[cur_ext].bmv_oflags = 0;
if (map[i].br_state == XFS_EXT_UNWRITTEN) if (map[i].br_state == XFS_EXT_UNWRITTEN)
out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
...@@ -760,16 +757,27 @@ xfs_getbmap( ...@@ -760,16 +757,27 @@ xfs_getbmap(
continue; continue;
} }
/*
* In order to report shared extents accurately,
* we report each distinct shared/unshared part
* of a single bmbt record using multiple bmap
* extents. To make that happen, we iterate the
* same map array item multiple times, each
* time trimming out the subextent that we just
* reported.
*
* Because of this, we must check the out array
* index (cur_ext) directly against bmv_count-1
* to avoid overflows.
*/
if (inject_map.br_startblock != NULLFSBLOCK) { if (inject_map.br_startblock != NULLFSBLOCK) {
map[i] = inject_map; map[i] = inject_map;
i--; i--;
} else }
nexleft--;
bmv->bmv_entries++; bmv->bmv_entries++;
cur_ext++; cur_ext++;
} }
} while (nmap && nexleft && bmv->bmv_length && } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
cur_ext < bmv->bmv_count);
out_free_map: out_free_map:
kmem_free(map); kmem_free(map);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment