Commit 77eea05e authored by Boris Burkov's avatar Boris Burkov Committed by David Sterba

btrfs: add ro compat flags to inodes

Currently, inode flags are fully backwards incompatible in btrfs. If we
introduce a new inode flag, then tree-checker will detect it and fail.
This can even cause us to fail to mount entirely. To make it possible to
introduce new flags which can be read-only compatible, like VERITY, we
add new ro flags to btrfs without treating them quite so harshly in
tree-checker. A read-only file system can survive an unexpected flag,
and can be mounted.

As for the implementation, it unfortunately gets a little complicated.

The on-disk representation of the inode, btrfs_inode_item, has an __le64
for flags but the in-memory representation, btrfs_inode, uses a u32.
David Sterba had the nice idea that we could reclaim those wasted 32 bits
on disk and use them for the new ro_compat flags.

It turns out that the tree-checker code which checks for unknown flags
is broken, and ignores the upper 32 bits we are hoping to use. The issue
is that the flags use the literal 1 rather than 1ULL, so the flags are
signed ints, and one of them is specifically (1 << 31). As a result, the
mask which ORs the flags is a negative integer on machines where int is
32 bit twos complement. When tree-checker evaluates the expression:

  btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK)

The mask is something like 0x80000abc, which gets promoted to u64 with
sign extension to 0xffffffff80000abc. Negating that 64 bit mask leaves
all the upper bits zeroed, and we can't detect unexpected flags.

This suggests that we can't use those bits after all. Luckily, we have
good reason to believe that they are zero anyway. Inode flags are
metadata, which is always checksummed, so any bit flips that would
introduce 1s would cause a checksum failure anyway (excluding the
improbable case of the checksum getting corrupted exactly badly).

Further, unless the 1 << 31 flag is used, the cast to u64 of the 32 bit
inode flag should preserve its value and not add leading zeroes
(at least for twos complement). The only place that flag
(BTRFS_INODE_ROOT_ITEM_INIT) is used is in a special inode embedded in
the root item, and indeed for that inode we see 0xffffffff80000000 as
the flags on disk. However, that inode is never seen by tree checker,
nor is it used in a context where verity might be meaningful.
Theoretically, a future ro flag might cause trouble on that inode, so we
should proactively clean up that mess before it does.

With the introduction of the new ro flags, keep two separate unsigned
masks and check them against the appropriate u32. Since we no longer run
afoul of sign extension, this also stops writing out 0xffffffff80000000
in root_item inodes going forward.
Signed-off-by: default avatarBoris Burkov <boris@bur.io>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent efc222f8
...@@ -189,8 +189,10 @@ struct btrfs_inode { ...@@ -189,8 +189,10 @@ struct btrfs_inode {
*/ */
u64 csum_bytes; u64 csum_bytes;
/* flags field from the on disk inode */ /* Backwards incompatible flags, lower half of inode_item::flags */
u32 flags; u32 flags;
/* Read-only compatibility flags, upper half of inode_item::flags */
u32 ro_flags;
/* /*
* Counters to keep track of the number of extent item's we may use due * Counters to keep track of the number of extent item's we may use due
...@@ -348,6 +350,22 @@ struct btrfs_dio_private { ...@@ -348,6 +350,22 @@ struct btrfs_dio_private {
u8 csums[]; u8 csums[];
}; };
/*
* btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
* separate u32s. These two functions convert between the two representations.
*/
static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
{
return (flags | ((u64)ro_flags << 32));
}
static inline void btrfs_inode_split_flags(u64 inode_item_flags,
u32 *flags, u32 *ro_flags)
{
*flags = (u32)inode_item_flags;
*ro_flags = (u32)(inode_item_flags >> 32);
}
/* Array of bytes with variable length, hexadecimal format 0x1234 */ /* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN" #define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes #define CSUM_FMT_VALUE(size, bytes) size, bytes
......
...@@ -1482,20 +1482,20 @@ do { \ ...@@ -1482,20 +1482,20 @@ do { \
/* /*
* Inode flags * Inode flags
*/ */
#define BTRFS_INODE_NODATASUM (1 << 0) #define BTRFS_INODE_NODATASUM (1U << 0)
#define BTRFS_INODE_NODATACOW (1 << 1) #define BTRFS_INODE_NODATACOW (1U << 1)
#define BTRFS_INODE_READONLY (1 << 2) #define BTRFS_INODE_READONLY (1U << 2)
#define BTRFS_INODE_NOCOMPRESS (1 << 3) #define BTRFS_INODE_NOCOMPRESS (1U << 3)
#define BTRFS_INODE_PREALLOC (1 << 4) #define BTRFS_INODE_PREALLOC (1U << 4)
#define BTRFS_INODE_SYNC (1 << 5) #define BTRFS_INODE_SYNC (1U << 5)
#define BTRFS_INODE_IMMUTABLE (1 << 6) #define BTRFS_INODE_IMMUTABLE (1U << 6)
#define BTRFS_INODE_APPEND (1 << 7) #define BTRFS_INODE_APPEND (1U << 7)
#define BTRFS_INODE_NODUMP (1 << 8) #define BTRFS_INODE_NODUMP (1U << 8)
#define BTRFS_INODE_NOATIME (1 << 9) #define BTRFS_INODE_NOATIME (1U << 9)
#define BTRFS_INODE_DIRSYNC (1 << 10) #define BTRFS_INODE_DIRSYNC (1U << 10)
#define BTRFS_INODE_COMPRESS (1 << 11) #define BTRFS_INODE_COMPRESS (1U << 11)
#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) #define BTRFS_INODE_ROOT_ITEM_INIT (1U << 31)
#define BTRFS_INODE_FLAG_MASK \ #define BTRFS_INODE_FLAG_MASK \
(BTRFS_INODE_NODATASUM | \ (BTRFS_INODE_NODATASUM | \
...@@ -1512,6 +1512,8 @@ do { \ ...@@ -1512,6 +1512,8 @@ do { \
BTRFS_INODE_COMPRESS | \ BTRFS_INODE_COMPRESS | \
BTRFS_INODE_ROOT_ITEM_INIT) BTRFS_INODE_ROOT_ITEM_INIT)
#define BTRFS_INODE_RO_FLAG_MASK (0)
struct btrfs_map_token { struct btrfs_map_token {
struct extent_buffer *eb; struct extent_buffer *eb;
char *kaddr; char *kaddr;
......
...@@ -1645,6 +1645,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, ...@@ -1645,6 +1645,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *inode_item, struct btrfs_inode_item *inode_item,
struct inode *inode) struct inode *inode)
{ {
u64 flags;
btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode)); btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode)); btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
...@@ -1657,7 +1659,9 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, ...@@ -1657,7 +1659,9 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
inode_peek_iversion(inode)); inode_peek_iversion(inode));
btrfs_set_stack_inode_transid(inode_item, trans->transid); btrfs_set_stack_inode_transid(inode_item, trans->transid);
btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
BTRFS_I(inode)->ro_flags);
btrfs_set_stack_inode_flags(inode_item, flags);
btrfs_set_stack_inode_block_group(inode_item, 0); btrfs_set_stack_inode_block_group(inode_item, 0);
btrfs_set_stack_timespec_sec(&inode_item->atime, btrfs_set_stack_timespec_sec(&inode_item->atime,
...@@ -1715,7 +1719,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) ...@@ -1715,7 +1719,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
btrfs_stack_inode_sequence(inode_item)); btrfs_stack_inode_sequence(inode_item));
inode->i_rdev = 0; inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item); *rdev = btrfs_stack_inode_rdev(inode_item);
BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); btrfs_inode_split_flags(btrfs_stack_inode_flags(inode_item),
&BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime); inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime); inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
......
...@@ -3764,7 +3764,8 @@ static int btrfs_read_locked_inode(struct inode *inode, ...@@ -3764,7 +3764,8 @@ static int btrfs_read_locked_inode(struct inode *inode,
rdev = btrfs_inode_rdev(leaf, inode_item); rdev = btrfs_inode_rdev(leaf, inode_item);
BTRFS_I(inode)->index_cnt = (u64)-1; BTRFS_I(inode)->index_cnt = (u64)-1;
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item),
&BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
cache_index: cache_index:
/* /*
...@@ -3895,6 +3896,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, ...@@ -3895,6 +3896,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct inode *inode) struct inode *inode)
{ {
struct btrfs_map_token token; struct btrfs_map_token token;
u64 flags;
btrfs_init_map_token(&token, leaf); btrfs_init_map_token(&token, leaf);
...@@ -3930,7 +3932,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, ...@@ -3930,7 +3932,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode)); btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
btrfs_set_token_inode_transid(&token, item, trans->transid); btrfs_set_token_inode_transid(&token, item, trans->transid);
btrfs_set_token_inode_rdev(&token, item, inode->i_rdev); btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags); flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
BTRFS_I(inode)->ro_flags);
btrfs_set_token_inode_flags(&token, item, flags);
btrfs_set_token_inode_block_group(&token, item, 0); btrfs_set_token_inode_block_group(&token, item, 0);
} }
...@@ -9064,6 +9068,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ...@@ -9064,6 +9068,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->defrag_bytes = 0; ei->defrag_bytes = 0;
ei->disk_i_size = 0; ei->disk_i_size = 0;
ei->flags = 0; ei->flags = 0;
ei->ro_flags = 0;
ei->csum_bytes = 0; ei->csum_bytes = 0;
ei->index_cnt = (u64)-1; ei->index_cnt = (u64)-1;
ei->dir_index = 0; ei->dir_index = 0;
......
...@@ -103,9 +103,10 @@ static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode, ...@@ -103,9 +103,10 @@ static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
* Export internal inode flags to the format expected by the FS_IOC_GETFLAGS * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
* ioctl. * ioctl.
*/ */
static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags) static unsigned int btrfs_inode_flags_to_fsflags(struct btrfs_inode *binode)
{ {
unsigned int iflags = 0; unsigned int iflags = 0;
u32 flags = binode->flags;
if (flags & BTRFS_INODE_SYNC) if (flags & BTRFS_INODE_SYNC)
iflags |= FS_SYNC_FL; iflags |= FS_SYNC_FL;
...@@ -200,7 +201,7 @@ int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) ...@@ -200,7 +201,7 @@ int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
{ {
struct btrfs_inode *binode = BTRFS_I(d_inode(dentry)); struct btrfs_inode *binode = BTRFS_I(d_inode(dentry));
fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(binode->flags)); fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(binode));
return 0; return 0;
} }
...@@ -224,7 +225,7 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns, ...@@ -224,7 +225,7 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
return -EOPNOTSUPP; return -EOPNOTSUPP;
fsflags = btrfs_mask_fsflags_for_type(inode, fa->flags); fsflags = btrfs_mask_fsflags_for_type(inode, fa->flags);
old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); old_fsflags = btrfs_inode_flags_to_fsflags(binode);
ret = check_fsflags(old_fsflags, fsflags); ret = check_fsflags(old_fsflags, fsflags);
if (ret) if (ret)
return ret; return ret;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "compression.h" #include "compression.h"
#include "volumes.h" #include "volumes.h"
#include "misc.h" #include "misc.h"
#include "btrfs_inode.h"
/* /*
* Error message should follow the following format: * Error message should follow the following format:
...@@ -1008,6 +1009,8 @@ static int check_inode_item(struct extent_buffer *leaf, ...@@ -1008,6 +1009,8 @@ static int check_inode_item(struct extent_buffer *leaf,
u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
u32 mode; u32 mode;
int ret; int ret;
u32 flags;
u32 ro_flags;
ret = check_inode_key(leaf, key, slot); ret = check_inode_key(leaf, key, slot);
if (unlikely(ret < 0)) if (unlikely(ret < 0))
...@@ -1063,11 +1066,17 @@ static int check_inode_item(struct extent_buffer *leaf, ...@@ -1063,11 +1066,17 @@ static int check_inode_item(struct extent_buffer *leaf,
btrfs_inode_nlink(leaf, iitem)); btrfs_inode_nlink(leaf, iitem));
return -EUCLEAN; return -EUCLEAN;
} }
if (unlikely(btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK)) { btrfs_inode_split_flags(btrfs_inode_flags(leaf, iitem), &flags, &ro_flags);
if (unlikely(flags & ~BTRFS_INODE_FLAG_MASK)) {
inode_item_err(leaf, slot, inode_item_err(leaf, slot,
"unknown flags detected: 0x%llx", "unknown incompat flags detected: 0x%x", flags);
btrfs_inode_flags(leaf, iitem) & return -EUCLEAN;
~BTRFS_INODE_FLAG_MASK); }
if (unlikely(!sb_rdonly(fs_info->sb) &&
(ro_flags & ~BTRFS_INODE_RO_FLAG_MASK))) {
inode_item_err(leaf, slot,
"unknown ro-compat flags detected on writeable mount: 0x%x",
ro_flags);
return -EUCLEAN; return -EUCLEAN;
} }
return 0; return 0;
......
...@@ -3924,6 +3924,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, ...@@ -3924,6 +3924,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
u64 logged_isize) u64 logged_isize)
{ {
struct btrfs_map_token token; struct btrfs_map_token token;
u64 flags;
btrfs_init_map_token(&token, leaf); btrfs_init_map_token(&token, leaf);
...@@ -3973,7 +3974,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, ...@@ -3973,7 +3974,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode)); btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
btrfs_set_token_inode_transid(&token, item, trans->transid); btrfs_set_token_inode_transid(&token, item, trans->transid);
btrfs_set_token_inode_rdev(&token, item, inode->i_rdev); btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags); flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
BTRFS_I(inode)->ro_flags);
btrfs_set_token_inode_flags(&token, item, flags);
btrfs_set_token_inode_block_group(&token, item, 0); btrfs_set_token_inode_block_group(&token, item, 0);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment