Commit aeced661 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-4.12-ofs-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux

Pull orangefs updates from Mike Marshall:
 "Orangefs cleanups, fixes and statx support.

  Some cleanups:

   - remove unused get_fsid_from_ino
   - fix bounds check for listxattr
   - clean up oversize xattr validation
   - do not set getattr_time on orangefs_lookup
   - return from orangefs_devreq_read quickly if possible
   - do not wait for timeout if umounting
   - handle zero size write in debugfs

  Bug fixes:

   - do not check possibly stale size on truncate
   - ensure the userspace component is unmounted if mount fails
   - total reimplementation of dir.c

  New feature:

   - implement statx

  The new implementation of dir.c is kind of a big deal, all new code.
  It has been posted to fs-devel during the previous rc period, we
  didn't get much review or feedback from there, but it has been
  reviewed very heavily here, so much so that we have two entire
  versions of the reimplementation.

  Not only does the new implementation fix some xfstests, but it passes
  all the new tests we made here that involve seeking and rewinding and
  giant directories and long file names. The new dir code has three
  patches itself:

   - skip forward to the next directory entry if seek is short
   - invalidate stored directory on seek
   - count directory pieces correctly"

* tag 'for-linus-4.12-ofs-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux:
  orangefs: count directory pieces correctly
  orangefs: invalidate stored directory on seek
  orangefs: skip forward to the next directory entry if seek is short
  orangefs: handle zero size write in debugfs
  orangefs: do not wait for timeout if umounting
  orangefs: return from orangefs_devreq_read quickly if possible
  orangefs: ensure the userspace component is unmounted if mount fails
  orangefs: do not check possibly stale size on truncate
  orangefs: implement statx
  orangefs: remove ORANGEFS_READDIR macros
  orangefs: support very large directories
  orangefs: support llseek on directories
  orangefs: rewrite readdir to fix several bugs
  orangefs: do not set getattr_time on orangefs_lookup
  orangefs: clean up oversize xattr validation
  orangefs: fix bounds check for listxattr
  orangefs: remove unused get_fsid_from_ino
parents 414975eb 2f713b5c
......@@ -180,6 +180,10 @@ static ssize_t orangefs_devreq_read(struct file *file,
return -EINVAL;
}
/* Check for an empty list before locking. */
if (list_empty(&orangefs_request_list))
return -EAGAIN;
restart:
/* Get next op (if any) from top of list. */
spin_lock(&orangefs_request_list_lock);
......
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
* Copyright 2017 Omnibond Systems, L.L.C.
*/
#include "protocol.h"
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"
struct orangefs_dir_part {
struct orangefs_dir_part *next;
size_t len;
};
struct orangefs_dir {
__u64 token;
struct orangefs_dir_part *part;
loff_t end;
int error;
};
#define PART_SHIFT (24)
#define PART_SIZE (1<<24)
#define PART_MASK (~(PART_SIZE - 1))
/*
* decode routine used by kmod to deal with the blob sent from
* userspace for readdirs. The blob contains zero or more of these
* sub-blobs:
* __u32 - represents length of the character string that follows.
* string - between 1 and ORANGEFS_NAME_MAX bytes long.
* padding - (if needed) to cause the __u32 plus the string to be
* eight byte aligned.
* khandle - sizeof(khandle) bytes.
* There can be up to 512 directory entries. Each entry is encoded as
* follows:
* 4 bytes: string size (n)
* n bytes: string
* 1 byte: trailing zero
* padding to 8 bytes
* 16 bytes: khandle
* padding to 8 bytes
*
* The trailer_buf starts with a struct orangefs_readdir_response_s
* which must be skipped to get to the directory data.
*
* The data which is received from the userspace daemon is termed a
* part and is stored in a linked list in case more than one part is
* needed for a large directory.
*
* The position pointer (ctx->pos) encodes the part and offset on which
* to begin reading at. Bits above PART_SHIFT encode the part and bits
* below PART_SHIFT encode the offset. Parts are stored in a linked
* list which grows as data is received from the server. The overhead
* associated with managing the list is presumed to be small compared to
* the overhead of communicating with the server.
*
* As data is received from the server, it is placed at the end of the
* part list. Data is parsed from the current position as it is needed.
* When data is determined to be corrupt, it is either because the
* userspace component has sent back corrupt data or because the file
* pointer has been moved to an invalid location. Since the two cannot
* be differentiated, return EIO.
*
* Part zero is synthesized to contains `.' and `..'. Part one is the
* first part of the part list.
*/
static long decode_dirents(char *ptr, size_t size,
struct orangefs_readdir_response_s *readdir)
static int do_readdir(struct orangefs_inode_s *oi,
struct orangefs_dir *od, struct dentry *dentry,
struct orangefs_kernel_op_s *op)
{
int i;
struct orangefs_readdir_response_s *rd =
(struct orangefs_readdir_response_s *) ptr;
char *buf = ptr;
int khandle_size = sizeof(struct orangefs_khandle);
size_t offset = offsetof(struct orangefs_readdir_response_s,
dirent_array);
/* 8 reflects eight byte alignment */
int smallest_blob = khandle_size + 8;
__u32 len;
int aligned_len;
int sizeof_u32 = sizeof(__u32);
long ret;
gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);
/* size is = offset on empty dirs, > offset on non-empty dirs... */
if (size < offset) {
gossip_err("%s: size:%zu: offset:%zu:\n",
__func__,
size,
offset);
ret = -EINVAL;
goto out;
}
struct orangefs_readdir_response_s *resp;
int bufi, r;
if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
__func__,
size,
readdir->orangefs_dirent_outcount);
ret = -EINVAL;
goto out;
}
/*
* Despite the badly named field, readdir does not use shared
* memory. However, there are a limited number of readdir
* slots, which must be allocated here. This flag simply tells
* the op scheduler to return the op here for retry.
*/
op->uses_shared_memory = 1;
op->upcall.req.readdir.refn = oi->refn;
op->upcall.req.readdir.token = od->token;
op->upcall.req.readdir.max_dirent_count =
ORANGEFS_MAX_DIRENT_COUNT_READDIR;
readdir->token = rd->token;
readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
sizeof(*readdir->dirent_array),
GFP_KERNEL);
if (readdir->dirent_array == NULL) {
gossip_err("%s: kcalloc failed.\n", __func__);
ret = -ENOMEM;
goto out;
again:
bufi = orangefs_readdir_index_get();
if (bufi < 0) {
od->error = bufi;
return bufi;
}
buf += offset;
size -= offset;
for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
if (size < smallest_blob) {
gossip_err("%s: size:%zu: smallest_blob:%d:\n",
__func__,
size,
smallest_blob);
ret = -EINVAL;
goto free;
}
op->upcall.req.readdir.buf_index = bufi;
len = *(__u32 *)buf;
if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
gossip_err("%s: len:%d:\n", __func__, len);
ret = -EINVAL;
goto free;
}
r = service_operation(op, "orangefs_readdir",
get_interruptible_flag(dentry->d_inode));
gossip_debug(GOSSIP_DIR_DEBUG,
"%s: size:%zu: len:%d:\n",
__func__,
size,
len);
orangefs_readdir_index_put(bufi);
readdir->dirent_array[i].d_name = buf + sizeof_u32;
readdir->dirent_array[i].d_length = len;
if (op_state_purged(op)) {
if (r == -EAGAIN) {
vfree(op->downcall.trailer_buf);
goto again;
} else if (r == -EIO) {
vfree(op->downcall.trailer_buf);
od->error = r;
return r;
}
}
/*
* Calculate "aligned" length of this string and its
* associated __u32 descriptor.
*/
aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
gossip_debug(GOSSIP_DIR_DEBUG,
"%s: aligned_len:%d:\n",
__func__,
aligned_len);
if (r < 0) {
vfree(op->downcall.trailer_buf);
od->error = r;
return r;
} else if (op->downcall.status) {
vfree(op->downcall.trailer_buf);
od->error = op->downcall.status;
return op->downcall.status;
}
/*
* The end of the blob should coincide with the end
* of the last sub-blob.
* The maximum size is size per entry times the 512 entries plus
* the header. This is well under the limit.
*/
if (size < aligned_len + khandle_size) {
gossip_err("%s: ran off the end of the blob.\n",
__func__);
ret = -EINVAL;
goto free;
if (op->downcall.trailer_size > PART_SIZE) {
vfree(op->downcall.trailer_buf);
od->error = -EIO;
return -EIO;
}
size -= aligned_len + khandle_size;
buf += aligned_len;
resp = (struct orangefs_readdir_response_s *)
op->downcall.trailer_buf;
od->token = resp->token;
return 0;
}
readdir->dirent_array[i].khandle =
*(struct orangefs_khandle *) buf;
buf += khandle_size;
static int parse_readdir(struct orangefs_dir *od,
struct orangefs_kernel_op_s *op)
{
struct orangefs_dir_part *part, *new;
size_t count;
count = 1;
part = od->part;
while (part) {
count++;
if (part->next)
part = part->next;
else
break;
}
ret = buf - ptr;
gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
goto out;
free:
kfree(readdir->dirent_array);
readdir->dirent_array = NULL;
new = (void *)op->downcall.trailer_buf;
new->next = NULL;
new->len = op->downcall.trailer_size -
sizeof(struct orangefs_readdir_response_s);
if (!od->part)
od->part = new;
else
part->next = new;
count++;
od->end = count << PART_SHIFT;
out:
return ret;
return 0;
}
/*
* Read directory entries from an instance of an open directory.
*/
static int orangefs_readdir(struct file *file, struct dir_context *ctx)
static int orangefs_dir_more(struct orangefs_inode_s *oi,
struct orangefs_dir *od, struct dentry *dentry)
{
int ret = 0;
int buffer_index;
/*
* ptoken supports Orangefs' distributed directory logic, added
* in 2.9.2.
*/
__u64 *ptoken = file->private_data;
__u64 pos = 0;
ino_t ino = 0;
struct dentry *dentry = file->f_path.dentry;
struct orangefs_kernel_op_s *new_op = NULL;
struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
struct orangefs_readdir_response_s readdir_response;
void *dents_buf;
int i = 0;
int len = 0;
ino_t current_ino = 0;
char *current_entry = NULL;
long bytes_decoded;
gossip_debug(GOSSIP_DIR_DEBUG,
"%s: ctx->pos:%lld, ptoken = %llu\n",
__func__,
lld(ctx->pos),
llu(*ptoken));
pos = (__u64) ctx->pos;
/* are we done? */
if (pos == ORANGEFS_READDIR_END) {
gossip_debug(GOSSIP_DIR_DEBUG,
"Skipping to termination path\n");
return 0;
struct orangefs_kernel_op_s *op;
int r;
op = op_alloc(ORANGEFS_VFS_OP_READDIR);
if (!op) {
od->error = -ENOMEM;
return -ENOMEM;
}
r = do_readdir(oi, od, dentry, op);
if (r) {
od->error = r;
goto out;
}
r = parse_readdir(od, op);
if (r) {
od->error = r;
goto out;
}
gossip_debug(GOSSIP_DIR_DEBUG,
"orangefs_readdir called on %pd (pos=%llu)\n",
dentry, llu(pos));
od->error = 0;
out:
op_release(op);
return od->error;
}
memset(&readdir_response, 0, sizeof(readdir_response));
static int fill_from_part(struct orangefs_dir_part *part,
struct dir_context *ctx)
{
const int offset = sizeof(struct orangefs_readdir_response_s);
struct orangefs_khandle *khandle;
__u32 *len, padlen;
loff_t i;
char *s;
i = ctx->pos & ~PART_MASK;
new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
if (!new_op)
return -ENOMEM;
/* The file offset from userspace is too large. */
if (i > part->len)
return 1;
/*
* Only the indices are shared. No memory is actually shared, but the
* mechanism is used.
* If the seek pointer is positioned just before an entry it
* should find the next entry.
*/
new_op->uses_shared_memory = 1;
new_op->upcall.req.readdir.refn = orangefs_inode->refn;
new_op->upcall.req.readdir.max_dirent_count =
ORANGEFS_MAX_DIRENT_COUNT_READDIR;
gossip_debug(GOSSIP_DIR_DEBUG,
"%s: upcall.req.readdir.refn.khandle: %pU\n",
__func__,
&new_op->upcall.req.readdir.refn.khandle);
new_op->upcall.req.readdir.token = *ptoken;
if (i % 8)
i = i + (8 - i%8)%8;
get_new_buffer_index:
buffer_index = orangefs_readdir_index_get();
if (buffer_index < 0) {
ret = buffer_index;
gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
ret);
goto out_free_op;
while (i < part->len) {
if (part->len < i + sizeof *len)
break;
len = (void *)part + offset + i;
/*
* len is the size of the string itself. padlen is the
* total size of the encoded string.
*/
padlen = (sizeof *len + *len + 1) +
(8 - (sizeof *len + *len + 1)%8)%8;
if (part->len < i + padlen + sizeof *khandle)
goto next;
s = (void *)part + offset + i + sizeof *len;
if (s[*len] != 0)
goto next;
khandle = (void *)part + offset + i + padlen;
if (!dir_emit(ctx, s, *len,
orangefs_khandle_to_ino(khandle),
DT_UNKNOWN))
return 0;
i += padlen + sizeof *khandle;
i = i + (8 - i%8)%8;
BUG_ON(i > part->len);
ctx->pos = (ctx->pos & PART_MASK) | i;
continue;
next:
i += 8;
}
new_op->upcall.req.readdir.buf_index = buffer_index;
ret = service_operation(new_op,
"orangefs_readdir",
get_interruptible_flag(dentry->d_inode));
gossip_debug(GOSSIP_DIR_DEBUG,
"Readdir downcall status is %d. ret:%d\n",
new_op->downcall.status,
ret);
orangefs_readdir_index_put(buffer_index);
return 1;
}
if (ret == -EAGAIN && op_state_purged(new_op)) {
/* Client-core indices are invalid after it restarted. */
gossip_debug(GOSSIP_DIR_DEBUG,
"%s: Getting new buffer_index for retry of readdir..\n",
__func__);
goto get_new_buffer_index;
}
static int orangefs_dir_fill(struct orangefs_inode_s *oi,
struct orangefs_dir *od, struct dentry *dentry,
struct dir_context *ctx)
{
struct orangefs_dir_part *part;
size_t count;
if (ret == -EIO && op_state_purged(new_op)) {
gossip_err("%s: Client is down. Aborting readdir call.\n",
__func__);
goto out_free_op;
}
count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
if (ret < 0 || new_op->downcall.status != 0) {
gossip_debug(GOSSIP_DIR_DEBUG,
"Readdir request failed. Status:%d\n",
new_op->downcall.status);
if (ret >= 0)
ret = new_op->downcall.status;
goto out_free_op;
part = od->part;
while (part->next && count) {
count--;
part = part->next;
}
dents_buf = new_op->downcall.trailer_buf;
if (dents_buf == NULL) {
gossip_err("Invalid NULL buffer in readdir response\n");
ret = -ENOMEM;
goto out_free_op;
/* This means the userspace file offset is invalid. */
if (count) {
od->error = -EIO;
return -EIO;
}
bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
&readdir_response);
if (bytes_decoded < 0) {
ret = bytes_decoded;
gossip_err("Could not decode readdir from buffer %d\n", ret);
goto out_vfree;
while (part && part->len) {
int r;
r = fill_from_part(part, ctx);
if (r < 0) {
od->error = r;
return r;
} else if (r == 0) {
/* Userspace buffer is full. */
break;
} else {
/*
* The part ran out of data. Move to the next
* part. */
ctx->pos = (ctx->pos & PART_MASK) +
(1 << PART_SHIFT);
part = part->next;
}
if (bytes_decoded != new_op->downcall.trailer_size) {
gossip_err("orangefs_readdir: # bytes decoded (%ld) "
"!= trailer size (%ld)\n",
bytes_decoded,
(long)new_op->downcall.trailer_size);
ret = -EINVAL;
goto out_destroy_handle;
}
return 0;
}
static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
int whence)
{
struct orangefs_dir *od = file->private_data;
/*
* orangefs doesn't actually store dot and dot-dot, but
* we need to have them represented.
* Delete the stored data so userspace sees new directory
* entries.
*/
if (pos == 0) {
ino = get_ino_from_khandle(dentry->d_inode);
gossip_debug(GOSSIP_DIR_DEBUG,
"%s: calling dir_emit of \".\" with pos = %llu\n",
__func__,
llu(pos));
ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
pos += 1;
if (!whence && offset < od->end) {
struct orangefs_dir_part *part = od->part;
while (part) {
struct orangefs_dir_part *next = part->next;
vfree(part);
part = next;
}
od->token = ORANGEFS_ITERATE_START;
od->part = NULL;
od->end = 1 << PART_SHIFT;
}
return default_llseek(file, offset, whence);
}
static int orangefs_dir_iterate(struct file *file,
struct dir_context *ctx)
{
struct orangefs_inode_s *oi;
struct orangefs_dir *od;
struct dentry *dentry;
int r;
dentry = file->f_path.dentry;
oi = ORANGEFS_I(dentry->d_inode);
od = file->private_data;
if (pos == 1) {
ino = get_parent_ino_from_dentry(dentry);
gossip_debug(GOSSIP_DIR_DEBUG,
"%s: calling dir_emit of \"..\" with pos = %llu\n",
__func__,
llu(pos));
ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
pos += 1;
if (od->error)
return od->error;
if (ctx->pos == 0) {
if (!dir_emit_dot(file, ctx))
return 0;
ctx->pos++;
}
if (ctx->pos == 1) {
if (!dir_emit_dotdot(file, ctx))
return 0;
ctx->pos = 1 << PART_SHIFT;
}
/*
* we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
* to prevent "finding" dot and dot-dot on any iteration
* other than the first.
* The seek position is in the first synthesized part but is not
* valid.
*/
if (ctx->pos == ORANGEFS_ITERATE_NEXT)
ctx->pos = 0;
gossip_debug(GOSSIP_DIR_DEBUG,
"%s: dirent_outcount:%d:\n",
__func__,
readdir_response.orangefs_dirent_outcount);
for (i = ctx->pos;
i < readdir_response.orangefs_dirent_outcount;
i++) {
len = readdir_response.dirent_array[i].d_length;
current_entry = readdir_response.dirent_array[i].d_name;
current_ino = orangefs_khandle_to_ino(
&readdir_response.dirent_array[i].khandle);
gossip_debug(GOSSIP_DIR_DEBUG,
"calling dir_emit for %s with len %d"
", ctx->pos %ld\n",
current_entry,
len,
(unsigned long)ctx->pos);
/*
* type is unknown. We don't return object type
* in the dirent_array. This leaves getdents
* clueless about type.
*/
ret =
dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
if (!ret)
break;
ctx->pos++;
gossip_debug(GOSSIP_DIR_DEBUG,
"%s: ctx->pos:%lld\n",
__func__,
lld(ctx->pos));
if ((ctx->pos & PART_MASK) == 0)
return -EIO;
}
r = 0;
/*
* we ran all the way through the last batch, set up for
* getting another batch...
* Must read more if the user has sought past what has been read
* so far. Stop a user who has sought past the end.
*/
if (ret) {
*ptoken = readdir_response.token;
ctx->pos = ORANGEFS_ITERATE_NEXT;
while (od->token != ORANGEFS_ITERATE_END &&
ctx->pos > od->end) {
r = orangefs_dir_more(oi, od, dentry);
if (r)
return r;
}
if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
return -EIO;
/* Then try to fill if there's any left in the buffer. */
if (ctx->pos < od->end) {
r = orangefs_dir_fill(oi, od, dentry, ctx);
if (r)
return r;
}
/*
* Did we hit the end of the directory?
*/
if (readdir_response.token == ORANGEFS_READDIR_END) {
gossip_debug(GOSSIP_DIR_DEBUG,
"End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
ctx->pos = ORANGEFS_READDIR_END;
/* Finally get some more and try to fill. */
if (od->token != ORANGEFS_ITERATE_END) {
r = orangefs_dir_more(oi, od, dentry);
if (r)
return r;
r = orangefs_dir_fill(oi, od, dentry, ctx);
}
out_destroy_handle:
/* kfree(NULL) is safe */
kfree(readdir_response.dirent_array);
out_vfree:
gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
vfree(dents_buf);
out_free_op:
op_release(new_op);
gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
return ret;
return r;
}
static int orangefs_dir_open(struct inode *inode, struct file *file)
{
__u64 *ptoken;
file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
struct orangefs_dir *od;
file->private_data = kmalloc(sizeof(struct orangefs_dir),
GFP_KERNEL);
if (!file->private_data)
return -ENOMEM;
ptoken = file->private_data;
*ptoken = ORANGEFS_READDIR_START;
od = file->private_data;
od->token = ORANGEFS_ITERATE_START;
od->part = NULL;
od->end = 1 << PART_SHIFT;
od->error = 0;
return 0;
}
static int orangefs_dir_release(struct inode *inode, struct file *file)
{
struct orangefs_dir *od = file->private_data;
struct orangefs_dir_part *part = od->part;
orangefs_flush_inode(inode);
kfree(file->private_data);
while (part) {
struct orangefs_dir_part *next = part->next;
vfree(part);
part = next;
}
kfree(od);
return 0;
}
/** ORANGEFS implementation of VFS directory operations */
const struct file_operations orangefs_dir_operations = {
.llseek = orangefs_dir_llseek,
.read = generic_read_dir,
.iterate = orangefs_readdir,
.iterate = orangefs_dir_iterate,
.open = orangefs_dir_open,
.release = orangefs_dir_release,
.release = orangefs_dir_release
};
......@@ -40,16 +40,6 @@ struct orangefs_mkdir_response {
struct orangefs_object_kref refn;
};
/*
* duplication of some system interface structures so that I don't have
* to allocate extra memory
*/
struct orangefs_dirent {
char *d_name;
int d_length;
struct orangefs_khandle khandle;
};
struct orangefs_statfs_response {
__s64 block_size;
__s64 blocks_total;
......@@ -131,12 +121,16 @@ struct orangefs_downcall_s {
} resp;
};
/*
* The readdir response comes in the trailer. It is followed by the
* directory entries as described in dir.c.
*/
struct orangefs_readdir_response_s {
__u64 token;
__u64 directory_version;
__u32 __pad2;
__u32 orangefs_dirent_outcount;
struct orangefs_dirent *dirent_array;
};
#endif /* __DOWNCALL_H */
......@@ -474,7 +474,8 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite
/* Make sure generic_write_checks sees an up to date inode size. */
if (file->f_flags & O_APPEND) {
rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1);
rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1,
STATX_SIZE);
if (rc == -ESTALE)
rc = -EIO;
if (rc) {
......@@ -692,7 +693,8 @@ static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin)
* NOTE: We are only interested in file size here,
* so we set mask accordingly.
*/
ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1);
ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1,
STATX_SIZE);
if (ret == -ESTALE)
ret = -EIO;
if (ret) {
......
......@@ -161,7 +161,7 @@ static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr)
iattr->ia_size);
/* Ensure that we have a up to date size, so we know if it changed. */
ret = orangefs_inode_getattr(inode, 0, 1);
ret = orangefs_inode_getattr(inode, 0, 1, STATX_SIZE);
if (ret == -ESTALE)
ret = -EIO;
if (ret) {
......@@ -218,8 +218,7 @@ int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
if (ret)
goto out;
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(inode)) {
if (iattr->ia_valid & ATTR_SIZE) {
ret = orangefs_setattr_size(inode, iattr);
if (ret)
goto out;
......@@ -256,13 +255,19 @@ int orangefs_getattr(const struct path *path, struct kstat *stat,
"orangefs_getattr: called on %pd\n",
path->dentry);
ret = orangefs_inode_getattr(inode, 0, 0);
ret = orangefs_inode_getattr(inode, 0, 0, request_mask);
if (ret == 0) {
generic_fillattr(inode, stat);
/* override block size reported to stat */
orangefs_inode = ORANGEFS_I(inode);
stat->blksize = orangefs_inode->blksize;
if (request_mask & STATX_SIZE)
stat->result_mask = STATX_BASIC_STATS;
else
stat->result_mask = STATX_BASIC_STATS &
~STATX_SIZE;
}
return ret;
}
......@@ -277,7 +282,7 @@ int orangefs_permission(struct inode *inode, int mask)
gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__);
/* Make sure the permission (and other common attrs) are up to date. */
ret = orangefs_inode_getattr(inode, 0, 0);
ret = orangefs_inode_getattr(inode, 0, 0, STATX_MODE);
if (ret < 0)
return ret;
......@@ -375,7 +380,7 @@ struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref
if (!inode || !(inode->i_state & I_NEW))
return inode;
error = orangefs_inode_getattr(inode, 1, 1);
error = orangefs_inode_getattr(inode, 1, 1, STATX_ALL);
if (error) {
iget_failed(inode);
return ERR_PTR(error);
......@@ -420,7 +425,7 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir,
orangefs_set_inode(inode, ref);
inode->i_ino = hash; /* needed for stat etc */
error = orangefs_inode_getattr(inode, 1, 1);
error = orangefs_inode_getattr(inode, 1, 1, STATX_ALL);
if (error)
goto out_iput;
......
......@@ -74,6 +74,7 @@ static int orangefs_create(struct inode *dir,
unlock_new_inode(inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
gossip_debug(GOSSIP_NAME_DEBUG,
"%s: dentry instantiated for %pd\n",
......@@ -193,8 +194,6 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
gossip_debug(GOSSIP_NAME_DEBUG,
"%s:%s:%d "
"Found good inode [%lu] with count [%d]\n",
......@@ -324,6 +323,7 @@ static int orangefs_symlink(struct inode *dir,
unlock_new_inode(inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
gossip_debug(GOSSIP_NAME_DEBUG,
"Inode (Symlink) %pU -> %pd\n",
......@@ -388,6 +388,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
unlock_new_inode(inode);
orangefs_set_timeout(dentry);
ORANGEFS_I(inode)->getattr_time = jiffies - 1;
ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS;
gossip_debug(GOSSIP_NAME_DEBUG,
"Inode (Directory) %pU -> %pd\n",
......
......@@ -440,6 +440,9 @@ static ssize_t orangefs_debug_write(struct file *file,
"orangefs_debug_write: %pD\n",
file);
if (count == 0)
return 0;
/*
* Thwart users who try to jamb a ridiculous number
* of bytes into the debug file...
......
......@@ -52,12 +52,7 @@
*/
#define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000800
/*
* The maximum number of directory entries in a single request is 96.
* XXX: Why can this not be higher. The client-side code can handle up to 512.
* XXX: What happens if we expect more than the client can return?
*/
#define ORANGEFS_MAX_DIRENT_COUNT_READDIR 96
#define ORANGEFS_MAX_DIRENT_COUNT_READDIR 512
#include "upcall.h"
#include "downcall.h"
......
......@@ -215,6 +215,7 @@ struct orangefs_inode_s {
unsigned long pinode_flags;
unsigned long getattr_time;
u32 getattr_mask;
};
#define P_ATIME_FLAG 0
......@@ -340,11 +341,6 @@ static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode)
return &(ORANGEFS_I(inode)->refn.khandle);
}
static inline __s32 get_fsid_from_ino(struct inode *inode)
{
return ORANGEFS_I(inode)->refn.fs_id;
}
static inline ino_t get_ino_from_khandle(struct inode *inode)
{
struct orangefs_khandle *khandle;
......@@ -500,7 +496,8 @@ int orangefs_inode_setxattr(struct inode *inode,
size_t size,
int flags);
int orangefs_inode_getattr(struct inode *inode, int new, int bypass);
int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
u32 request_mask);
int orangefs_inode_check_changed(struct inode *inode);
......
......@@ -251,7 +251,8 @@ static int orangefs_inode_is_stale(struct inode *inode, int new,
return 0;
}
int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
u32 request_mask)
{
struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
struct orangefs_kernel_op_s *new_op;
......@@ -262,7 +263,13 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
get_khandle_from_ino(inode));
if (!new && !bypass) {
if (time_before(jiffies, orangefs_inode->getattr_time))
/*
* Must have all the attributes in the mask and be within cache
* time.
*/
if ((request_mask & orangefs_inode->getattr_mask) ==
request_mask &&
time_before(jiffies, orangefs_inode->getattr_time))
return 0;
}
......@@ -270,7 +277,15 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
if (!new_op)
return -ENOMEM;
new_op->upcall.req.getattr.refn = orangefs_inode->refn;
/*
* Size is the hardest attribute to get. The incremental cost of any
* other attribute is essentially zero.
*/
if (request_mask & STATX_SIZE || new)
new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
else
new_op->upcall.req.getattr.mask =
ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
ret = service_operation(new_op, __func__,
get_interruptible_flag(inode));
......@@ -291,6 +306,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
case S_IFREG:
inode->i_flags = orangefs_inode_flags(&new_op->
downcall.resp.getattr.attributes);
if (request_mask & STATX_SIZE || new) {
inode_size = (loff_t)new_op->
downcall.resp.getattr.attributes.size;
rounded_up_size =
......@@ -303,13 +319,16 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
inode->i_blocks =
(unsigned long)(rounded_up_size / 512);
spin_unlock(&inode->i_lock);
}
break;
case S_IFDIR:
if (request_mask & STATX_SIZE || new) {
inode->i_size = PAGE_SIZE;
orangefs_inode->blksize = i_blocksize(inode);
spin_lock(&inode->i_lock);
inode_set_bytes(inode, inode->i_size);
spin_unlock(&inode->i_lock);
}
set_nlink(inode, 1);
break;
case S_IFLNK:
......@@ -349,6 +368,10 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
orangefs_inode->getattr_time = jiffies +
orangefs_getattr_timeout_msecs*HZ/1000;
if (request_mask & STATX_SIZE || new)
orangefs_inode->getattr_mask = STATX_BASIC_STATS;
else
orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE;
ret = 0;
out:
op_release(new_op);
......@@ -500,41 +523,6 @@ int orangefs_flush_inode(struct inode *inode)
return ret;
}
int orangefs_unmount_sb(struct super_block *sb)
{
int ret = -EINVAL;
struct orangefs_kernel_op_s *new_op = NULL;
gossip_debug(GOSSIP_UTILS_DEBUG,
"orangefs_unmount_sb called on sb %p\n",
sb);
new_op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT);
if (!new_op)
return -ENOMEM;
new_op->upcall.req.fs_umount.id = ORANGEFS_SB(sb)->id;
new_op->upcall.req.fs_umount.fs_id = ORANGEFS_SB(sb)->fs_id;
strncpy(new_op->upcall.req.fs_umount.orangefs_config_server,
ORANGEFS_SB(sb)->devname,
ORANGEFS_MAX_SERVER_ADDR_LEN);
gossip_debug(GOSSIP_UTILS_DEBUG,
"Attempting ORANGEFS Unmount via host %s\n",
new_op->upcall.req.fs_umount.orangefs_config_server);
ret = service_operation(new_op, "orangefs_fs_umount", 0);
gossip_debug(GOSSIP_UTILS_DEBUG,
"orangefs_unmount: got return value of %d\n", ret);
if (ret)
sb = ERR_PTR(ret);
else
ORANGEFS_SB(sb)->mount_pending = 1;
op_release(new_op);
return ret;
}
void orangefs_make_bad_inode(struct inode *inode)
{
if (is_root_handle(inode)) {
......
......@@ -138,13 +138,8 @@ typedef __s64 ORANGEFS_offset;
#define ORANGEFS_G_SGID (1 << 10)
#define ORANGEFS_U_SUID (1 << 11)
/* definition taken from stdint.h */
#define INT32_MAX (2147483647)
#define ORANGEFS_ITERATE_START (INT32_MAX - 1)
#define ORANGEFS_ITERATE_END (INT32_MAX - 2)
#define ORANGEFS_ITERATE_NEXT (INT32_MAX - 3)
#define ORANGEFS_READDIR_START ORANGEFS_ITERATE_START
#define ORANGEFS_READDIR_END ORANGEFS_ITERATE_END
#define ORANGEFS_ITERATE_START 2147483646
#define ORANGEFS_ITERATE_END 2147483645
#define ORANGEFS_IMMUTABLE_FL FS_IMMUTABLE_FL
#define ORANGEFS_APPEND_FL FS_APPEND_FL
#define ORANGEFS_NOATIME_FL FS_NOATIME_FL
......
......@@ -376,6 +376,25 @@ static const struct export_operations orangefs_export_ops = {
.fh_to_dentry = orangefs_fh_to_dentry,
};
static int orangefs_unmount(int id, __s32 fs_id, const char *devname)
{
struct orangefs_kernel_op_s *op;
int r;
op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT);
if (!op)
return -ENOMEM;
op->upcall.req.fs_umount.id = id;
op->upcall.req.fs_umount.fs_id = fs_id;
strncpy(op->upcall.req.fs_umount.orangefs_config_server,
devname, ORANGEFS_MAX_SERVER_ADDR_LEN);
r = service_operation(op, "orangefs_fs_umount", 0);
/* Not much to do about an error here. */
if (r)
gossip_err("orangefs_unmount: service_operation %d\n", r);
op_release(op);
return r;
}
static int orangefs_fill_sb(struct super_block *sb,
struct orangefs_fs_mount_response *fs_mount,
void *data, int silent)
......@@ -484,6 +503,8 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
if (IS_ERR(sb)) {
d = ERR_CAST(sb);
orangefs_unmount(new_op->downcall.resp.fs_mount.id,
new_op->downcall.resp.fs_mount.fs_id, devname);
goto free_op;
}
......@@ -539,6 +560,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
free_sb_and_op:
/* Will call orangefs_kill_sb with sb not in list. */
ORANGEFS_SB(sb)->no_list = 1;
/* ORANGEFS_VFS_OP_FS_UMOUNT is done by orangefs_kill_sb. */
deactivate_locked_super(sb);
free_op:
gossip_err("orangefs_mount: mount request failed with %d\n", ret);
......@@ -554,6 +576,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
void orangefs_kill_sb(struct super_block *sb)
{
int r;
gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_kill_sb: called\n");
/* provided sb cleanup */
......@@ -563,7 +586,10 @@ void orangefs_kill_sb(struct super_block *sb)
* issue the unmount to userspace to tell it to remove the
* dynamic mount info it has for this superblock
*/
orangefs_unmount_sb(sb);
r = orangefs_unmount(ORANGEFS_SB(sb)->id, ORANGEFS_SB(sb)->fs_id,
ORANGEFS_SB(sb)->devname);
if (!r)
ORANGEFS_SB(sb)->mount_pending = 1;
if (!ORANGEFS_SB(sb)->no_list) {
/* remove the sb from our list of orangefs specific sb's */
......
......@@ -124,6 +124,13 @@ int service_operation(struct orangefs_kernel_op_s *op,
gossip_debug(GOSSIP_WAIT_DEBUG,
"%s:client core is NOT in service.\n",
__func__);
/*
* Don't wait for the userspace component to return if
* the filesystem is being umounted anyway.
*/
if (op->upcall.type == ORANGEFS_VFS_OP_FS_UMOUNT)
timeout = 0;
else
timeout = op_timeout_secs * HZ;
}
spin_unlock(&orangefs_request_list_lock);
......
......@@ -76,11 +76,8 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name,
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
gossip_err("Invalid key length (%d)\n",
(int)strlen(name));
if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
return -EINVAL;
}
fsuid = from_kuid(&init_user_ns, current_fsuid());
fsgid = from_kgid(&init_user_ns, current_fsgid());
......@@ -172,6 +169,9 @@ static int orangefs_inode_removexattr(struct inode *inode, const char *name,
struct orangefs_kernel_op_s *new_op = NULL;
int ret = -ENOMEM;
if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
return -EINVAL;
down_write(&orangefs_inode->xattr_sem);
new_op = op_alloc(ORANGEFS_VFS_OP_REMOVEXATTR);
if (!new_op)
......@@ -231,23 +231,13 @@ int orangefs_inode_setxattr(struct inode *inode, const char *name,
"%s: name %s, buffer_size %zd\n",
__func__, name, size);
if (size >= ORANGEFS_MAX_XATTR_VALUELEN ||
flags < 0) {
gossip_err("orangefs_inode_setxattr: bogus values of size(%d), flags(%d)\n",
(int)size,
flags);
if (size > ORANGEFS_MAX_XATTR_VALUELEN)
return -EINVAL;
if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN)
return -EINVAL;
}
internal_flag = convert_to_internal_xattr_flags(flags);
if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
gossip_err
("orangefs_inode_setxattr: bogus key size (%d)\n",
(int)(strlen(name)));
return -EINVAL;
}
/* This is equivalent to a removexattr */
if (size == 0 && value == NULL) {
gossip_debug(GOSSIP_XATTR_DEBUG,
......@@ -358,7 +348,7 @@ ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size)
returned_count = new_op->downcall.resp.listxattr.returned_count;
if (returned_count < 0 ||
returned_count >= ORANGEFS_MAX_XATTR_LISTLEN) {
returned_count > ORANGEFS_MAX_XATTR_LISTLEN) {
gossip_err("%s: impossible value for returned_count:%d:\n",
__func__,
returned_count);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment