Commit aeced661 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-4.12-ofs-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux

Pull orangefs updates from Mike Marshall:
 "Orangefs cleanups, fixes and statx support.

  Some cleanups:

   - remove unused get_fsid_from_ino
   - fix bounds check for listxattr
   - clean up oversize xattr validation
   - do not set getattr_time on orangefs_lookup
   - return from orangefs_devreq_read quickly if possible
   - do not wait for timeout if umounting
   - handle zero size write in debugfs

  Bug fixes:

   - do not check possibly stale size on truncate
   - ensure the userspace component is unmounted if mount fails
   - total reimplementation of dir.c

  New feature:

   - implement statx

  The new implementation of dir.c is kind of a big deal, all new code.
  It has been posted to fs-devel during the previous rc period, we
  didn't get much review or feedback from there, but it has been
  reviewed very heavily here, so much so that we have two entire
  versions of the reimplementation.

  Not only does the new implementation fix some xfstests, but it passes
  all the new tests we made here that involve seeking and rewinding and
  giant directories and long file names. The new dir code has three
  patches itself:

   - skip forward to the next directory entry if seek is short
   - invalidate stored directory on seek
   - count directory pieces correctly"

* tag 'for-linus-4.12-ofs-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux:
  orangefs: count directory pieces correctly
  orangefs: invalidate stored directory on seek
  orangefs: skip forward to the next directory entry if seek is short
  orangefs: handle zero size write in debugfs
  orangefs: do not wait for timeout if umounting
  orangefs: return from orangefs_devreq_read quickly if possible
  orangefs: ensure the userspace component is unmounted if mount fails
  orangefs: do not check possibly stale size on truncate
  orangefs: implement statx
  orangefs: remove ORANGEFS_READDIR macros
  orangefs: support very large directories
  orangefs: support llseek on directories
  orangefs: rewrite readdir to fix several bugs
  orangefs: do not set getattr_time on orangefs_lookup
  orangefs: clean up oversize xattr validation
  orangefs: fix bounds check for listxattr
  orangefs: remove unused get_fsid_from_ino
parents 414975eb 2f713b5c
......@@ -180,6 +180,10 @@ static ssize_t orangefs_devreq_read(struct file *file,
return -EINVAL;
}
/* Check for an empty list before locking. */
if (list_empty(&orangefs_request_list))
return -EAGAIN;
restart:
/* Get next op (if any) from top of list. */
spin_lock(&orangefs_request_list_lock);
......
This diff is collapsed.
......@@ -40,16 +40,6 @@ struct orangefs_mkdir_response {
struct orangefs_object_kref refn;
};
/*
* duplication of some system interface structures so that I don't have
* to allocate extra memory
*/
struct orangefs_dirent {
char *d_name;
int d_length;
struct orangefs_khandle khandle;
};
struct orangefs_statfs_response {
__s64 block_size;
__s64 blocks_total;
......@@ -131,12 +121,16 @@ struct orangefs_downcall_s {
} resp;
};
/*
* The readdir response comes in the trailer. It is followed by the
* directory entries as described in dir.c.
*/
struct orangefs_readdir_response_s {
__u64 token;
__u64 directory_version;
__u32 __pad2;
__u32 orangefs_dirent_outcount;
struct orangefs_dirent *dirent_array;
};
#endif /* __DOWNCALL_H */
......@@ -474,7 +474,8 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite
/* Make sure generic_write_checks sees an up to date inode size. */
if (file->f_flags & O_APPEND) {
rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1);
rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1,
STATX_SIZE);
if (rc == -ESTALE)
rc = -EIO;
if (rc) {
......@@ -692,7 +693,8 @@ static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin)
* NOTE: We are only interested in file size here,
* so we set mask accordingly.
*/
ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1);
ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1,
STATX_SIZE);
if (ret == -ESTALE)
ret = -EIO;
if (ret) {
......
......@@ -161,7 +161,7 @@ static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr)
iattr->ia_size);
/* Ensure that we have a up to date size, so we know if it changed. */
ret = orangefs_inode_getattr(inode, 0, 1);
ret = orangefs_inode_getattr(inode, 0, 1, STATX_SIZE);
if (ret == -ESTALE)
ret = -EIO;
if (ret) {
......@@ -218,8 +218,7 @@ int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
if (ret)
goto out;
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(inode)) {
if (iattr->ia_valid & ATTR_SIZE) {
ret = orangefs_setattr_size(inode, iattr);
if (ret)
goto out;
......@@ -256,13 +255,19 @@ int orangefs_getattr(const struct path *path, struct kstat *stat,
"orangefs_getattr: called on %pd\n",
path->dentry);
ret = orangefs_inode_getattr(inode, 0, 0);
ret = orangefs_inode_getattr(inode, 0, 0, request_mask);
if (ret == 0) {
generic_fillattr(inode, stat);
/* override block size reported to stat */
orangefs_inode = ORANGEFS_I(inode);
stat->blksize = orangefs_inode->blksize;
if (request_mask & STATX_SIZE)
stat->result_mask = STATX_BASIC_STATS;
else
stat->result_mask = STATX_BASIC_STATS &
~STATX_SIZE;
}
return ret;
}
......@@ -277,7 +282,7 @@ int orangefs_permission(struct inode *inode, int mask)
gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__);
/* Make sure the permission (and other common attrs) are up to date. */
ret = orangefs_inode_getattr(inode, 0, 0);
ret = orangefs_inode_getattr(inode, 0, 0, STATX_MODE);
if (ret < 0)
return ret;
......@@ -375,7 +380,7 @@ struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref
if (!inode || !(inode->i_state & I_NEW))
return inode;
error = orangefs_inode_getattr(inode, 1, 1);
error = orangefs_inode_getattr(inode, 1, 1, STATX_ALL);
if (error) {
iget_failed(inode);
return ERR_PTR(error);
......@@ -420,7 +425,7 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir,
orangefs_set_inode(inode, ref);
inode->i_ino = hash; /* needed for stat etc */
error = orangefs_inode_getattr(inode, 1, 1);
error = orangefs_inode_getattr(inode, 1, 1, STATX_ALL);
if (error)
goto out_iput;
......
......@@ -74,6 +74,7 @@ static int orangefs_create(struct inode *dir,
unlock_new_inode(inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
gossip_debug(GOSSIP_NAME_DEBUG,
"%s: dentry instantiated for %pd\n",
......@@ -193,8 +194,6 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
gossip_debug(GOSSIP_NAME_DEBUG,
"%s:%s:%d "
"Found good inode [%lu] with count [%d]\n",
......@@ -324,6 +323,7 @@ static int orangefs_symlink(struct inode *dir,
unlock_new_inode(inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
gossip_debug(GOSSIP_NAME_DEBUG,
"Inode (Symlink) %pU -> %pd\n",
......@@ -388,6 +388,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
unlock_new_inode(inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
gossip_debug(GOSSIP_NAME_DEBUG,
"Inode (Directory) %pU -> %pd\n",
......
......@@ -440,6 +440,9 @@ static ssize_t orangefs_debug_write(struct file *file,
"orangefs_debug_write: %pD\n",
file);
if (count == 0)
return 0;
/*
* Thwart users who try to jamb a ridiculous number
* of bytes into the debug file...
......
......@@ -52,12 +52,7 @@
*/
#define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000800
/*
* The maximum number of directory entries in a single request is 96.
* XXX: Why can this not be higher. The client-side code can handle up to 512.
* XXX: What happens if we expect more than the client can return?
*/
#define ORANGEFS_MAX_DIRENT_COUNT_READDIR 96
#define ORANGEFS_MAX_DIRENT_COUNT_READDIR 512
#include "upcall.h"
#include "downcall.h"
......
......@@ -215,6 +215,7 @@ struct orangefs_inode_s {
unsigned long pinode_flags;
unsigned long getattr_time;
u32 getattr_mask;
};
#define P_ATIME_FLAG 0
......@@ -340,11 +341,6 @@ static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode)
return &(ORANGEFS_I(inode)->refn.khandle);
}
static inline __s32 get_fsid_from_ino(struct inode *inode)
{
return ORANGEFS_I(inode)->refn.fs_id;
}
static inline ino_t get_ino_from_khandle(struct inode *inode)
{
struct orangefs_khandle *khandle;
......@@ -500,7 +496,8 @@ int orangefs_inode_setxattr(struct inode *inode,
size_t size,
int flags);
int orangefs_inode_getattr(struct inode *inode, int new, int bypass);
int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
u32 request_mask);
int orangefs_inode_check_changed(struct inode *inode);
......
......@@ -251,7 +251,8 @@ static int orangefs_inode_is_stale(struct inode *inode, int new,
return 0;
}
int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
u32 request_mask)
{
struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
struct orangefs_kernel_op_s *new_op;
......@@ -262,7 +263,13 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
get_khandle_from_ino(inode));
if (!new && !bypass) {
if (time_before(jiffies, orangefs_inode->getattr_time))
/*
* Must have all the attributes in the mask and be within cache
* time.
*/
if ((request_mask & orangefs_inode->getattr_mask) ==
request_mask &&
time_before(jiffies, orangefs_inode->getattr_time))
return 0;
}
......@@ -270,7 +277,15 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
if (!new_op)
return -ENOMEM;
new_op->upcall.req.getattr.refn = orangefs_inode->refn;
new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
/*
* Size is the hardest attribute to get. The incremental cost of any
* other attribute is essentially zero.
*/
if (request_mask & STATX_SIZE || new)
new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
else
new_op->upcall.req.getattr.mask =
ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
ret = service_operation(new_op, __func__,
get_interruptible_flag(inode));
......@@ -291,25 +306,29 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
case S_IFREG:
inode->i_flags = orangefs_inode_flags(&new_op->
downcall.resp.getattr.attributes);
inode_size = (loff_t)new_op->
downcall.resp.getattr.attributes.size;
rounded_up_size =
(inode_size + (4096 - (inode_size % 4096)));
inode->i_size = inode_size;
orangefs_inode->blksize =
new_op->downcall.resp.getattr.attributes.blksize;
spin_lock(&inode->i_lock);
inode->i_bytes = inode_size;
inode->i_blocks =
(unsigned long)(rounded_up_size / 512);
spin_unlock(&inode->i_lock);
if (request_mask & STATX_SIZE || new) {
inode_size = (loff_t)new_op->
downcall.resp.getattr.attributes.size;
rounded_up_size =
(inode_size + (4096 - (inode_size % 4096)));
inode->i_size = inode_size;
orangefs_inode->blksize =
new_op->downcall.resp.getattr.attributes.blksize;
spin_lock(&inode->i_lock);
inode->i_bytes = inode_size;
inode->i_blocks =
(unsigned long)(rounded_up_size / 512);
spin_unlock(&inode->i_lock);
}
break;
case S_IFDIR:
inode->i_size = PAGE_SIZE;
orangefs_inode->blksize = i_blocksize(inode);
spin_lock(&inode->i_lock);
inode_set_bytes(inode, inode->i_size);
spin_unlock(&inode->i_lock);
if (request_mask & STATX_SIZE || new) {
inode->i_size = PAGE_SIZE;
orangefs_inode->blksize = i_blocksize(inode);
spin_lock(&inode->i_lock);
inode_set_bytes(inode, inode->i_size);
spin_unlock(&inode->i_lock);
}
set_nlink(inode, 1);
break;
case S_IFLNK:
......@@ -349,6 +368,10 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
orangefs_inode->getattr_time = jiffies +
orangefs_getattr_timeout_msecs*HZ/1000;
if (request_mask & STATX_SIZE || new)
orangefs_inode->getattr_mask = STATX_BASIC_STATS;
else
orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE;
ret = 0;
out:
op_release(new_op);
......@@ -500,41 +523,6 @@ int orangefs_flush_inode(struct inode *inode)
return ret;
}
int orangefs_unmount_sb(struct super_block *sb)
{
int ret = -EINVAL;
struct orangefs_kernel_op_s *new_op = NULL;
gossip_debug(GOSSIP_UTILS_DEBUG,
"orangefs_unmount_sb called on sb %p\n",
sb);
new_op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT);
if (!new_op)
return -ENOMEM;
new_op->upcall.req.fs_umount.id = ORANGEFS_SB(sb)->id;
new_op->upcall.req.fs_umount.fs_id = ORANGEFS_SB(sb)->fs_id;
strncpy(new_op->upcall.req.fs_umount.orangefs_config_server,
ORANGEFS_SB(sb)->devname,
ORANGEFS_MAX_SERVER_ADDR_LEN);
gossip_debug(GOSSIP_UTILS_DEBUG,
"Attempting ORANGEFS Unmount via host %s\n",
new_op->upcall.req.fs_umount.orangefs_config_server);
ret = service_operation(new_op, "orangefs_fs_umount", 0);
gossip_debug(GOSSIP_UTILS_DEBUG,
"orangefs_unmount: got return value of %d\n", ret);
if (ret)
sb = ERR_PTR(ret);
else
ORANGEFS_SB(sb)->mount_pending = 1;
op_release(new_op);
return ret;
}
void orangefs_make_bad_inode(struct inode *inode)
{
if (is_root_handle(inode)) {
......
......@@ -138,13 +138,8 @@ typedef __s64 ORANGEFS_offset;
#define ORANGEFS_G_SGID (1 << 10)
#define ORANGEFS_U_SUID (1 << 11)
/* definition taken from stdint.h */
#define INT32_MAX (2147483647)
#define ORANGEFS_ITERATE_START (INT32_MAX - 1)
#define ORANGEFS_ITERATE_END (INT32_MAX - 2)
#define ORANGEFS_ITERATE_NEXT (INT32_MAX - 3)
#define ORANGEFS_READDIR_START ORANGEFS_ITERATE_START
#define ORANGEFS_READDIR_END ORANGEFS_ITERATE_END
#define ORANGEFS_ITERATE_START 2147483646
#define ORANGEFS_ITERATE_END 2147483645
#define ORANGEFS_IMMUTABLE_FL FS_IMMUTABLE_FL
#define ORANGEFS_APPEND_FL FS_APPEND_FL
#define ORANGEFS_NOATIME_FL FS_NOATIME_FL
......
......@@ -376,6 +376,25 @@ static const struct export_operations orangefs_export_ops = {
.fh_to_dentry = orangefs_fh_to_dentry,
};
static int orangefs_unmount(int id, __s32 fs_id, const char *devname)
{
struct orangefs_kernel_op_s *op;
int r;
op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT);
if (!op)
return -ENOMEM;
op->upcall.req.fs_umount.id = id;
op->upcall.req.fs_umount.fs_id = fs_id;
strncpy(op->upcall.req.fs_umount.orangefs_config_server,
devname, ORANGEFS_MAX_SERVER_ADDR_LEN);
r = service_operation(op, "orangefs_fs_umount", 0);
/* Not much to do about an error here. */
if (r)
gossip_err("orangefs_unmount: service_operation %d\n", r);
op_release(op);
return r;
}
static int orangefs_fill_sb(struct super_block *sb,
struct orangefs_fs_mount_response *fs_mount,
void *data, int silent)
......@@ -484,6 +503,8 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
if (IS_ERR(sb)) {
d = ERR_CAST(sb);
orangefs_unmount(new_op->downcall.resp.fs_mount.id,
new_op->downcall.resp.fs_mount.fs_id, devname);
goto free_op;
}
......@@ -539,6 +560,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
free_sb_and_op:
/* Will call orangefs_kill_sb with sb not in list. */
ORANGEFS_SB(sb)->no_list = 1;
/* ORANGEFS_VFS_OP_FS_UMOUNT is done by orangefs_kill_sb. */
deactivate_locked_super(sb);
free_op:
gossip_err("orangefs_mount: mount request failed with %d\n", ret);
......@@ -554,6 +576,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
void orangefs_kill_sb(struct super_block *sb)
{
int r;
gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_kill_sb: called\n");
/* provided sb cleanup */
......@@ -563,7 +586,10 @@ void orangefs_kill_sb(struct super_block *sb)
* issue the unmount to userspace to tell it to remove the
* dynamic mount info it has for this superblock
*/
orangefs_unmount_sb(sb);
r = orangefs_unmount(ORANGEFS_SB(sb)->id, ORANGEFS_SB(sb)->fs_id,
ORANGEFS_SB(sb)->devname);
if (!r)
ORANGEFS_SB(sb)->mount_pending = 1;
if (!ORANGEFS_SB(sb)->no_list) {
/* remove the sb from our list of orangefs specific sb's */
......
......@@ -124,7 +124,14 @@ int service_operation(struct orangefs_kernel_op_s *op,
gossip_debug(GOSSIP_WAIT_DEBUG,
"%s:client core is NOT in service.\n",
__func__);
timeout = op_timeout_secs * HZ;
/*
* Don't wait for the userspace component to return if
* the filesystem is being umounted anyway.
*/
if (op->upcall.type == ORANGEFS_VFS_OP_FS_UMOUNT)
timeout = 0;
else
timeout = op_timeout_secs * HZ;
}
spin_unlock(&orangefs_request_list_lock);
......
......@@ -76,11 +76,8 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name,
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
gossip_err("Invalid key length (%d)\n",
(int)strlen(name));
if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
return -EINVAL;
}
fsuid = from_kuid(&init_user_ns, current_fsuid());
fsgid = from_kgid(&init_user_ns, current_fsgid());
......@@ -172,6 +169,9 @@ static int orangefs_inode_removexattr(struct inode *inode, const char *name,
struct orangefs_kernel_op_s *new_op = NULL;
int ret = -ENOMEM;
if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
return -EINVAL;
down_write(&orangefs_inode->xattr_sem);
new_op = op_alloc(ORANGEFS_VFS_OP_REMOVEXATTR);
if (!new_op)
......@@ -231,23 +231,13 @@ int orangefs_inode_setxattr(struct inode *inode, const char *name,
"%s: name %s, buffer_size %zd\n",
__func__, name, size);
if (size >= ORANGEFS_MAX_XATTR_VALUELEN ||
flags < 0) {
gossip_err("orangefs_inode_setxattr: bogus values of size(%d), flags(%d)\n",
(int)size,
flags);
if (size > ORANGEFS_MAX_XATTR_VALUELEN)
return -EINVAL;
if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
return -EINVAL;
}
internal_flag = convert_to_internal_xattr_flags(flags);
if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
gossip_err
("orangefs_inode_setxattr: bogus key size (%d)\n",
(int)(strlen(name)));
return -EINVAL;
}
/* This is equivalent to a removexattr */
if (size == 0 && value == NULL) {
gossip_debug(GOSSIP_XATTR_DEBUG,
......@@ -358,7 +348,7 @@ ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size)
returned_count = new_op->downcall.resp.listxattr.returned_count;
if (returned_count < 0 ||
returned_count >= ORANGEFS_MAX_XATTR_LISTLEN) {
returned_count > ORANGEFS_MAX_XATTR_LISTLEN) {
gossip_err("%s: impossible value for returned_count:%d:\n",
__func__,
returned_count);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment