Commit dfe51d47 authored by Amir Goldstein's avatar Amir Goldstein Committed by Miklos Szeredi

ovl: avoid possible inode number collisions with xino=on

When xino feature is enabled and a real directory inode number overflows
the lower xino bits, we cannot map this directory inode number to a unique
and persistent inode number and we fall back to the real inode st_ino and
overlay st_dev.

The real inode st_ino with high bits may collide with a lower inode number
on overlay st_dev that was mapped using xino.

To avoid possible collision with legitimate xino values, map a non
persistent inode number to a dedicated range in the xino address space.
The dedicated range is created by adding one more bit to the number of
reserved high xino bits.  We could have added just one more fsid, but that
would have had the undesired effect of changing persistent overlay inode
numbers on kernel or require more complex xino mapping code.
Signed-off-by: default avatarAmir Goldstein <amir73il@gmail.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent 4d314f78
...@@ -79,6 +79,7 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) ...@@ -79,6 +79,7 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
{ {
bool samefs = ovl_same_fs(dentry->d_sb); bool samefs = ovl_same_fs(dentry->d_sb);
unsigned int xinobits = ovl_xino_bits(dentry->d_sb); unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
unsigned int xinoshift = 64 - xinobits;
if (samefs) { if (samefs) {
/* /*
...@@ -89,20 +90,20 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) ...@@ -89,20 +90,20 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
stat->dev = dentry->d_sb->s_dev; stat->dev = dentry->d_sb->s_dev;
return 0; return 0;
} else if (xinobits) { } else if (xinobits) {
unsigned int shift = 64 - xinobits;
/* /*
* All inode numbers of underlying fs should not be using the * All inode numbers of underlying fs should not be using the
* high xinobits, so we use high xinobits to partition the * high xinobits, so we use high xinobits to partition the
* overlay st_ino address space. The high bits holds the fsid * overlay st_ino address space. The high bits holds the fsid
* (upper fsid is 0). This way overlay inode numbers are unique * (upper fsid is 0). The lowest xinobit is reserved for mapping
* and all inodes use overlay st_dev. Inode numbers are also * the non-peresistent inode numbers range in case of overflow.
* persistent for a given layer configuration. * This way all overlay inode numbers are unique and use the
* overlay st_dev.
*/ */
if (stat->ino >> shift) { if (unlikely(stat->ino >> xinoshift)) {
pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
dentry, stat->ino, xinobits); dentry, stat->ino, xinobits);
} else { } else {
stat->ino |= ((u64)fsid) << shift; stat->ino |= ((u64)fsid) << (xinoshift + 1);
stat->dev = dentry->d_sb->s_dev; stat->dev = dentry->d_sb->s_dev;
return 0; return 0;
} }
...@@ -573,6 +574,7 @@ static void ovl_next_ino(struct inode *inode) ...@@ -573,6 +574,7 @@ static void ovl_next_ino(struct inode *inode)
static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid) static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid)
{ {
int xinobits = ovl_xino_bits(inode->i_sb); int xinobits = ovl_xino_bits(inode->i_sb);
unsigned int xinoshift = 64 - xinobits;
/* /*
* When d_ino is consistent with st_ino (samefs or i_ino has enough * When d_ino is consistent with st_ino (samefs or i_ino has enough
...@@ -582,11 +584,28 @@ static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid) ...@@ -582,11 +584,28 @@ static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid)
* with d_ino also causes nfsd readdirplus to fail. * with d_ino also causes nfsd readdirplus to fail.
*/ */
inode->i_ino = ino; inode->i_ino = ino;
if (ovl_same_dev(inode->i_sb)) { if (ovl_same_fs(inode->i_sb)) {
if (xinobits && fsid && !(ino >> (64 - xinobits))) return;
inode->i_ino |= (unsigned long)fsid << (64 - xinobits); } else if (xinobits && likely(!(ino >> xinoshift))) {
} else if (S_ISDIR(inode->i_mode)) { inode->i_ino |= (unsigned long)fsid << (xinoshift + 1);
return;
}
/*
* For directory inodes on non-samefs with xino disabled or xino
* overflow, we allocate a non-persistent inode number, to be used for
* resolving st_ino collisions in ovl_map_dev_ino().
*
* To avoid ino collision with legitimate xino values from upper
* layer (fsid 0), use the lowest xinobit to map the non
* persistent inode numbers to the unified st_ino address space.
*/
if (S_ISDIR(inode->i_mode)) {
ovl_next_ino(inode); ovl_next_ino(inode);
if (xinobits) {
inode->i_ino &= ~0UL >> xinobits;
inode->i_ino |= 1UL << xinoshift;
}
} }
} }
......
...@@ -440,13 +440,19 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) ...@@ -440,13 +440,19 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
const char *name, int namelen) const char *name, int namelen)
{ {
if (ino >> (64 - xinobits)) { unsigned int xinoshift = 64 - xinobits;
if (unlikely(ino >> xinoshift)) {
pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
namelen, name, ino, xinobits); namelen, name, ino, xinobits);
return ino; return ino;
} }
return ino | ((u64)fsid) << (64 - xinobits); /*
* The lowest xinobit is reserved for mapping the non-peresistent inode
* numbers range, but this range is only exposed via st_ino, not here.
*/
return ino | ((u64)fsid) << (xinoshift + 1);
} }
/* /*
......
...@@ -1483,7 +1483,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, ...@@ -1483,7 +1483,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
* free high bits in underlying fs to hold the unique fsid. * free high bits in underlying fs to hold the unique fsid.
* If overlayfs does encounter underlying inodes using the high xino * If overlayfs does encounter underlying inodes using the high xino
* bits reserved for fsid, it emits a warning and uses the original * bits reserved for fsid, it emits a warning and uses the original
* inode number. * inode number or a non persistent inode number allocated from a
* dedicated range.
*/ */
if (ofs->numfs - !ofs->upper_mnt == 1) { if (ofs->numfs - !ofs->upper_mnt == 1) {
if (ofs->config.xino == OVL_XINO_ON) if (ofs->config.xino == OVL_XINO_ON)
...@@ -1494,11 +1495,13 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, ...@@ -1494,11 +1495,13 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
} else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) { } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
/* /*
* This is a roundup of number of bits needed for encoding * This is a roundup of number of bits needed for encoding
* fsid, where fsid 0 is reserved for upper fs even with * fsid, where fsid 0 is reserved for upper fs (even with
* lower only overlay. * lower only overlay) +1 extra bit is reserved for the non
* persistent inode number range that is used for resolving
* xino lower bits overflow.
*/ */
BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31); BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
ofs->xino_mode = ilog2(ofs->numfs - 1) + 1; ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
} }
if (ofs->xino_mode > 0) { if (ofs->xino_mode > 0) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment