Commit 76db8ac4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
  ceph: fix readdir EOVERFLOW on 32-bit archs
  ceph: fix frag offset for non-leftmost frags
  ceph: fix dangling pointer
  ceph: explicitly specify page alignment in network messages
  ceph: make page alignment explicit in osd interface
  ceph: fix comment, remove extraneous args
  ceph: fix update of ctime from MDS
  ceph: fix version check on racing inode updates
  ceph: fix uid/gid on resent mds requests
  ceph: fix rdcache_gen usage and invalidate
  ceph: re-request max_size if cap auth changes
  ceph: only let auth caps update max_size
  ceph: fix open for write on clustered mds
  ceph: fix bad pointer dereference in ceph_fill_trace
  ceph: fix small seq message skipping
  Revert "ceph: update issue_seq on cap grant"
parents caf83945 3105c19c
...@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) ...@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
page->index << PAGE_CACHE_SHIFT, &len, page->index << PAGE_CACHE_SHIFT, &len,
ci->i_truncate_seq, ci->i_truncate_size, ci->i_truncate_seq, ci->i_truncate_size,
&page, 1); &page, 1, 0);
if (err == -ENOENT) if (err == -ENOENT)
err = 0; err = 0;
if (err < 0) { if (err < 0) {
...@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, ...@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
offset, &len, offset, &len,
ci->i_truncate_seq, ci->i_truncate_size, ci->i_truncate_seq, ci->i_truncate_size,
pages, nr_pages); pages, nr_pages, 0);
if (rc == -ENOENT) if (rc == -ENOENT)
rc = 0; rc = 0;
if (rc < 0) if (rc < 0)
...@@ -774,7 +774,7 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -774,7 +774,7 @@ static int ceph_writepages_start(struct address_space *mapping,
snapc, do_sync, snapc, do_sync,
ci->i_truncate_seq, ci->i_truncate_seq,
ci->i_truncate_size, ci->i_truncate_size,
&inode->i_mtime, true, 1); &inode->i_mtime, true, 1, 0);
max_pages = req->r_num_pages; max_pages = req->r_num_pages;
alloc_page_vec(fsc, req); alloc_page_vec(fsc, req);
......
...@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode) ...@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
invalidating_gen == ci->i_rdcache_gen) { invalidating_gen == ci->i_rdcache_gen) {
/* success. */ /* success. */
dout("try_nonblocking_invalidate %p success\n", inode); dout("try_nonblocking_invalidate %p success\n", inode);
ci->i_rdcache_gen = 0; /* save any racing async invalidate some trouble */
ci->i_rdcache_revoking = 0; ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
return 0; return 0;
} }
dout("try_nonblocking_invalidate %p failed\n", inode); dout("try_nonblocking_invalidate %p failed\n", inode);
...@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ...@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds; int mds = session->s_mds;
unsigned seq = le32_to_cpu(grant->seq); int seq = le32_to_cpu(grant->seq);
unsigned issue_seq = le32_to_cpu(grant->issue_seq);
int newcaps = le32_to_cpu(grant->caps); int newcaps = le32_to_cpu(grant->caps);
int issued, implemented, used, wanted, dirty; int issued, implemented, used, wanted, dirty;
u64 size = le64_to_cpu(grant->size); u64 size = le64_to_cpu(grant->size);
...@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ...@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
int revoked_rdcache = 0; int revoked_rdcache = 0;
int queue_invalidate = 0; int queue_invalidate = 0;
dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); inode, cap, mds, seq, ceph_cap_string(newcaps));
dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
inode->i_size); inode->i_size);
...@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ...@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
} }
cap->seq = seq; cap->seq = seq;
cap->issue_seq = issue_seq;
/* file layout may have changed */ /* file layout may have changed */
ci->i_layout = grant->layout; ci->i_layout = grant->layout;
...@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, ...@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
NULL /* no caps context */); NULL /* no caps context */);
try_flush_caps(inode, session, NULL); try_flush_caps(inode, session, NULL);
up_read(&mdsc->snap_rwsem); up_read(&mdsc->snap_rwsem);
/* make sure we re-request max_size, if necessary */
spin_lock(&inode->i_lock);
ci->i_requested_max_size = 0;
spin_unlock(&inode->i_lock);
} }
/* /*
......
...@@ -336,7 +336,10 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) ...@@ -336,7 +336,10 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
if (req->r_reply_info.dir_end) { if (req->r_reply_info.dir_end) {
kfree(fi->last_name); kfree(fi->last_name);
fi->last_name = NULL; fi->last_name = NULL;
fi->next_offset = 2; if (ceph_frag_is_rightmost(frag))
fi->next_offset = 2;
else
fi->next_offset = 0;
} else { } else {
rinfo = &req->r_reply_info; rinfo = &req->r_reply_info;
err = note_last_dentry(fi, err = note_last_dentry(fi,
...@@ -355,18 +358,22 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) ...@@ -355,18 +358,22 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
u64 pos = ceph_make_fpos(frag, off); u64 pos = ceph_make_fpos(frag, off);
struct ceph_mds_reply_inode *in = struct ceph_mds_reply_inode *in =
rinfo->dir_in[off - fi->offset].in; rinfo->dir_in[off - fi->offset].in;
struct ceph_vino vino;
ino_t ino;
dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
off, off - fi->offset, rinfo->dir_nr, pos, off, off - fi->offset, rinfo->dir_nr, pos,
rinfo->dir_dname_len[off - fi->offset], rinfo->dir_dname_len[off - fi->offset],
rinfo->dir_dname[off - fi->offset], in); rinfo->dir_dname[off - fi->offset], in);
BUG_ON(!in); BUG_ON(!in);
ftype = le32_to_cpu(in->mode) >> 12; ftype = le32_to_cpu(in->mode) >> 12;
vino.ino = le64_to_cpu(in->ino);
vino.snap = le64_to_cpu(in->snapid);
ino = ceph_vino_to_ino(vino);
if (filldir(dirent, if (filldir(dirent,
rinfo->dir_dname[off - fi->offset], rinfo->dir_dname[off - fi->offset],
rinfo->dir_dname_len[off - fi->offset], rinfo->dir_dname_len[off - fi->offset],
pos, pos, ino, ftype) < 0) {
le64_to_cpu(in->ino),
ftype) < 0) {
dout("filldir stopping us...\n"); dout("filldir stopping us...\n");
return 0; return 0;
} }
...@@ -414,6 +421,7 @@ static void reset_readdir(struct ceph_file_info *fi) ...@@ -414,6 +421,7 @@ static void reset_readdir(struct ceph_file_info *fi)
fi->last_readdir = NULL; fi->last_readdir = NULL;
} }
kfree(fi->last_name); kfree(fi->last_name);
fi->last_name = NULL;
fi->next_offset = 2; /* compensate for . and .. */ fi->next_offset = 2; /* compensate for . and .. */
if (fi->dentry) { if (fi->dentry) {
dput(fi->dentry); dput(fi->dentry);
......
...@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file) ...@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file)
} }
/* /*
* No need to block if we have any caps. Update wanted set * No need to block if we have caps on the auth MDS (for
* write) or any MDS (for read). Update wanted set
* asynchronously. * asynchronously.
*/ */
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (__ceph_is_any_real_caps(ci)) { if (__ceph_is_any_real_caps(ci) &&
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
int mds_wanted = __ceph_caps_mds_wanted(ci); int mds_wanted = __ceph_caps_mds_wanted(ci);
int issued = __ceph_caps_issued(ci, NULL); int issued = __ceph_caps_issued(ci, NULL);
...@@ -280,11 +282,12 @@ int ceph_release(struct inode *inode, struct file *file) ...@@ -280,11 +282,12 @@ int ceph_release(struct inode *inode, struct file *file)
static int striped_read(struct inode *inode, static int striped_read(struct inode *inode,
u64 off, u64 len, u64 off, u64 len,
struct page **pages, int num_pages, struct page **pages, int num_pages,
int *checkeof) int *checkeof, bool align_to_pages)
{ {
struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
u64 pos, this_len; u64 pos, this_len;
int io_align, page_align;
int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
int left, pages_left; int left, pages_left;
int read; int read;
...@@ -300,14 +303,19 @@ static int striped_read(struct inode *inode, ...@@ -300,14 +303,19 @@ static int striped_read(struct inode *inode,
page_pos = pages; page_pos = pages;
pages_left = num_pages; pages_left = num_pages;
read = 0; read = 0;
io_align = off & ~PAGE_MASK;
more: more:
if (align_to_pages)
page_align = (pos - io_align) & ~PAGE_MASK;
else
page_align = pos & ~PAGE_MASK;
this_len = left; this_len = left;
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, pos, &this_len, &ci->i_layout, pos, &this_len,
ci->i_truncate_seq, ci->i_truncate_seq,
ci->i_truncate_size, ci->i_truncate_size,
page_pos, pages_left); page_pos, pages_left, page_align);
hit_stripe = this_len < left; hit_stripe = this_len < left;
was_short = ret >= 0 && ret < this_len; was_short = ret >= 0 && ret < this_len;
if (ret == -ENOENT) if (ret == -ENOENT)
...@@ -374,26 +382,25 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, ...@@ -374,26 +382,25 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
dout("sync_read on file %p %llu~%u %s\n", file, off, len, dout("sync_read on file %p %llu~%u %s\n", file, off, len,
(file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
if (file->f_flags & O_DIRECT) { if (file->f_flags & O_DIRECT)
pages = ceph_get_direct_page_vector(data, num_pages, off, len); pages = ceph_get_direct_page_vector(data, num_pages);
else
/*
* flush any page cache pages in this range. this
* will make concurrent normal and O_DIRECT io slow,
* but it will at least behave sensibly when they are
* in sequence.
*/
} else {
pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
}
if (IS_ERR(pages)) if (IS_ERR(pages))
return PTR_ERR(pages); return PTR_ERR(pages);
/*
* flush any page cache pages in this range. this
* will make concurrent normal and sync io slow,
* but it will at least behave sensibly when they are
* in sequence.
*/
ret = filemap_write_and_wait(inode->i_mapping); ret = filemap_write_and_wait(inode->i_mapping);
if (ret < 0) if (ret < 0)
goto done; goto done;
ret = striped_read(inode, off, len, pages, num_pages, checkeof); ret = striped_read(inode, off, len, pages, num_pages, checkeof,
file->f_flags & O_DIRECT);
if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
ret = ceph_copy_page_vector_to_user(pages, data, off, ret); ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
...@@ -448,6 +455,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, ...@@ -448,6 +455,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
int flags; int flags;
int do_sync = 0; int do_sync = 0;
int check_caps = 0; int check_caps = 0;
int page_align, io_align;
int ret; int ret;
struct timespec mtime = CURRENT_TIME; struct timespec mtime = CURRENT_TIME;
...@@ -462,6 +470,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, ...@@ -462,6 +470,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
else else
pos = *offset; pos = *offset;
io_align = pos & ~PAGE_MASK;
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -486,20 +496,26 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, ...@@ -486,20 +496,26 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
*/ */
more: more:
len = left; len = left;
if (file->f_flags & O_DIRECT)
/* write from beginning of first page, regardless of
io alignment */
page_align = (pos - io_align) & ~PAGE_MASK;
else
page_align = pos & ~PAGE_MASK;
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), pos, &len, ceph_vino(inode), pos, &len,
CEPH_OSD_OP_WRITE, flags, CEPH_OSD_OP_WRITE, flags,
ci->i_snap_realm->cached_context, ci->i_snap_realm->cached_context,
do_sync, do_sync,
ci->i_truncate_seq, ci->i_truncate_size, ci->i_truncate_seq, ci->i_truncate_size,
&mtime, false, 2); &mtime, false, 2, page_align);
if (!req) if (!req)
return -ENOMEM; return -ENOMEM;
num_pages = calc_pages_for(pos, len); num_pages = calc_pages_for(pos, len);
if (file->f_flags & O_DIRECT) { if (file->f_flags & O_DIRECT) {
pages = ceph_get_direct_page_vector(data, num_pages, pos, len); pages = ceph_get_direct_page_vector(data, num_pages);
if (IS_ERR(pages)) { if (IS_ERR(pages)) {
ret = PTR_ERR(pages); ret = PTR_ERR(pages);
goto out; goto out;
......
...@@ -470,7 +470,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, ...@@ -470,7 +470,9 @@ void ceph_fill_file_time(struct inode *inode, int issued,
if (issued & (CEPH_CAP_FILE_EXCL| if (issued & (CEPH_CAP_FILE_EXCL|
CEPH_CAP_FILE_WR| CEPH_CAP_FILE_WR|
CEPH_CAP_FILE_BUFFER)) { CEPH_CAP_FILE_BUFFER|
CEPH_CAP_AUTH_EXCL|
CEPH_CAP_XATTR_EXCL)) {
if (timespec_compare(ctime, &inode->i_ctime) > 0) { if (timespec_compare(ctime, &inode->i_ctime) > 0) {
dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
...@@ -510,7 +512,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, ...@@ -510,7 +512,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
warn = 1; warn = 1;
} }
} else { } else {
/* we have no write caps; whatever the MDS says is true */ /* we have no write|excl caps; whatever the MDS says is true */
if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
inode->i_ctime = *ctime; inode->i_ctime = *ctime;
inode->i_mtime = *mtime; inode->i_mtime = *mtime;
...@@ -566,12 +568,17 @@ static int fill_inode(struct inode *inode, ...@@ -566,12 +568,17 @@ static int fill_inode(struct inode *inode,
/* /*
* provided version will be odd if inode value is projected, * provided version will be odd if inode value is projected,
* even if stable. skip the update if we have a newer info * even if stable. skip the update if we have newer stable
* (e.g., due to inode info racing form multiple MDSs), or if * info (ours>=theirs, e.g. due to racing mds replies), unless
* we are getting projected (unstable) inode info. * we are getting projected (unstable) info (in which case the
* version is odd, and we want ours>theirs).
* us them
* 2 2 skip
* 3 2 skip
* 3 3 update
*/ */
if (le64_to_cpu(info->version) > 0 && if (le64_to_cpu(info->version) > 0 &&
(ci->i_version & ~1) > le64_to_cpu(info->version)) (ci->i_version & ~1) >= le64_to_cpu(info->version))
goto no_change; goto no_change;
issued = __ceph_caps_issued(ci, &implemented); issued = __ceph_caps_issued(ci, &implemented);
...@@ -605,7 +612,14 @@ static int fill_inode(struct inode *inode, ...@@ -605,7 +612,14 @@ static int fill_inode(struct inode *inode,
le32_to_cpu(info->time_warp_seq), le32_to_cpu(info->time_warp_seq),
&ctime, &mtime, &atime); &ctime, &mtime, &atime);
ci->i_max_size = le64_to_cpu(info->max_size); /* only update max_size on auth cap */
if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
ci->i_max_size != le64_to_cpu(info->max_size)) {
dout("max_size %lld -> %llu\n", ci->i_max_size,
le64_to_cpu(info->max_size));
ci->i_max_size = le64_to_cpu(info->max_size);
}
ci->i_layout = info->layout; ci->i_layout = info->layout;
inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
...@@ -1054,7 +1068,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, ...@@ -1054,7 +1068,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
ininfo = rinfo->targeti.in; ininfo = rinfo->targeti.in;
vino.ino = le64_to_cpu(ininfo->ino); vino.ino = le64_to_cpu(ininfo->ino);
vino.snap = le64_to_cpu(ininfo->snapid); vino.snap = le64_to_cpu(ininfo->snapid);
if (!dn->d_inode) { in = dn->d_inode;
if (!in) {
in = ceph_get_inode(sb, vino); in = ceph_get_inode(sb, vino);
if (IS_ERR(in)) { if (IS_ERR(in)) {
pr_err("fill_trace bad get_inode " pr_err("fill_trace bad get_inode "
...@@ -1385,11 +1400,8 @@ static void ceph_invalidate_work(struct work_struct *work) ...@@ -1385,11 +1400,8 @@ static void ceph_invalidate_work(struct work_struct *work)
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
dout("invalidate_pages %p gen %d revoking %d\n", inode, dout("invalidate_pages %p gen %d revoking %d\n", inode,
ci->i_rdcache_gen, ci->i_rdcache_revoking); ci->i_rdcache_gen, ci->i_rdcache_revoking);
if (ci->i_rdcache_gen == 0 || if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
ci->i_rdcache_revoking != ci->i_rdcache_gen) {
BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen);
/* nevermind! */ /* nevermind! */
ci->i_rdcache_revoking = 0;
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
goto out; goto out;
} }
...@@ -1399,15 +1411,16 @@ static void ceph_invalidate_work(struct work_struct *work) ...@@ -1399,15 +1411,16 @@ static void ceph_invalidate_work(struct work_struct *work)
ceph_invalidate_nondirty_pages(inode->i_mapping); ceph_invalidate_nondirty_pages(inode->i_mapping);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (orig_gen == ci->i_rdcache_gen) { if (orig_gen == ci->i_rdcache_gen &&
orig_gen == ci->i_rdcache_revoking) {
dout("invalidate_pages %p gen %d successful\n", inode, dout("invalidate_pages %p gen %d successful\n", inode,
ci->i_rdcache_gen); ci->i_rdcache_gen);
ci->i_rdcache_gen = 0; ci->i_rdcache_revoking--;
ci->i_rdcache_revoking = 0;
check = 1; check = 1;
} else { } else {
dout("invalidate_pages %p gen %d raced, gen now %d\n", dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
inode, orig_gen, ci->i_rdcache_gen); inode, orig_gen, ci->i_rdcache_gen,
ci->i_rdcache_revoking);
} }
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
...@@ -1738,7 +1751,7 @@ int ceph_do_getattr(struct inode *inode, int mask) ...@@ -1738,7 +1751,7 @@ int ceph_do_getattr(struct inode *inode, int mask)
return 0; return 0;
} }
dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
return 0; return 0;
......
...@@ -528,6 +528,9 @@ static void __register_request(struct ceph_mds_client *mdsc, ...@@ -528,6 +528,9 @@ static void __register_request(struct ceph_mds_client *mdsc,
ceph_mdsc_get_request(req); ceph_mdsc_get_request(req);
__insert_request(mdsc, req); __insert_request(mdsc, req);
req->r_uid = current_fsuid();
req->r_gid = current_fsgid();
if (dir) { if (dir) {
struct ceph_inode_info *ci = ceph_inode(dir); struct ceph_inode_info *ci = ceph_inode(dir);
...@@ -1587,8 +1590,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ...@@ -1587,8 +1590,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
head->op = cpu_to_le32(req->r_op); head->op = cpu_to_le32(req->r_op);
head->caller_uid = cpu_to_le32(current_fsuid()); head->caller_uid = cpu_to_le32(req->r_uid);
head->caller_gid = cpu_to_le32(current_fsgid()); head->caller_gid = cpu_to_le32(req->r_gid);
head->args = req->r_args; head->args = req->r_args;
ceph_encode_filepath(&p, end, ino1, path1); ceph_encode_filepath(&p, end, ino1, path1);
......
...@@ -170,6 +170,8 @@ struct ceph_mds_request { ...@@ -170,6 +170,8 @@ struct ceph_mds_request {
union ceph_mds_request_args r_args; union ceph_mds_request_args r_args;
int r_fmode; /* file mode, if expecting cap */ int r_fmode; /* file mode, if expecting cap */
uid_t r_uid;
gid_t r_gid;
/* for choosing which mds to send this request to */ /* for choosing which mds to send this request to */
int r_direct_mode; int r_direct_mode;
......
...@@ -293,9 +293,7 @@ struct ceph_inode_info { ...@@ -293,9 +293,7 @@ struct ceph_inode_info {
int i_rd_ref, i_rdcache_ref, i_wr_ref; int i_rd_ref, i_rdcache_ref, i_wr_ref;
int i_wrbuffer_ref, i_wrbuffer_ref_head; int i_wrbuffer_ref, i_wrbuffer_ref_head;
u32 i_shared_gen; /* increment each time we get FILE_SHARED */ u32 i_shared_gen; /* increment each time we get FILE_SHARED */
u32 i_rdcache_gen; /* we increment this each time we get u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
FILE_CACHE. If it's non-zero, we
_may_ have cached pages. */
u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
struct list_head i_unsafe_writes; /* uncommitted sync writes */ struct list_head i_unsafe_writes; /* uncommitted sync writes */
......
...@@ -227,8 +227,7 @@ extern int ceph_open_session(struct ceph_client *client); ...@@ -227,8 +227,7 @@ extern int ceph_open_session(struct ceph_client *client);
extern void ceph_release_page_vector(struct page **pages, int num_pages); extern void ceph_release_page_vector(struct page **pages, int num_pages);
extern struct page **ceph_get_direct_page_vector(const char __user *data, extern struct page **ceph_get_direct_page_vector(const char __user *data,
int num_pages, int num_pages);
loff_t off, size_t len);
extern void ceph_put_page_vector(struct page **pages, int num_pages); extern void ceph_put_page_vector(struct page **pages, int num_pages);
extern void ceph_release_page_vector(struct page **pages, int num_pages); extern void ceph_release_page_vector(struct page **pages, int num_pages);
extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
......
...@@ -82,6 +82,7 @@ struct ceph_msg { ...@@ -82,6 +82,7 @@ struct ceph_msg {
struct ceph_buffer *middle; struct ceph_buffer *middle;
struct page **pages; /* data payload. NOT OWNER. */ struct page **pages; /* data payload. NOT OWNER. */
unsigned nr_pages; /* size of page array */ unsigned nr_pages; /* size of page array */
unsigned page_alignment; /* io offset in first page */
struct ceph_pagelist *pagelist; /* instead of pages */ struct ceph_pagelist *pagelist; /* instead of pages */
struct list_head list_head; struct list_head list_head;
struct kref kref; struct kref kref;
......
...@@ -79,6 +79,7 @@ struct ceph_osd_request { ...@@ -79,6 +79,7 @@ struct ceph_osd_request {
struct ceph_file_layout r_file_layout; struct ceph_file_layout r_file_layout;
struct ceph_snap_context *r_snapc; /* snap context for writes */ struct ceph_snap_context *r_snapc; /* snap context for writes */
unsigned r_num_pages; /* size of page array (follows) */ unsigned r_num_pages; /* size of page array (follows) */
unsigned r_page_alignment; /* io offset in first page */
struct page **r_pages; /* pages for data payload */ struct page **r_pages; /* pages for data payload */
int r_pages_from_pool; int r_pages_from_pool;
int r_own_pages; /* if true, i own page list */ int r_own_pages; /* if true, i own page list */
...@@ -194,7 +195,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, ...@@ -194,7 +195,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
int do_sync, u32 truncate_seq, int do_sync, u32 truncate_seq,
u64 truncate_size, u64 truncate_size,
struct timespec *mtime, struct timespec *mtime,
bool use_mempool, int num_reply); bool use_mempool, int num_reply,
int page_align);
static inline void ceph_osdc_get_request(struct ceph_osd_request *req) static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
{ {
...@@ -218,7 +220,8 @@ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, ...@@ -218,7 +220,8 @@ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
struct ceph_file_layout *layout, struct ceph_file_layout *layout,
u64 off, u64 *plen, u64 off, u64 *plen,
u32 truncate_seq, u64 truncate_size, u32 truncate_seq, u64 truncate_size,
struct page **pages, int nr_pages); struct page **pages, int nr_pages,
int page_align);
extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
struct ceph_vino vino, struct ceph_vino vino,
......
...@@ -540,8 +540,7 @@ static void prepare_write_message(struct ceph_connection *con) ...@@ -540,8 +540,7 @@ static void prepare_write_message(struct ceph_connection *con)
/* initialize page iterator */ /* initialize page iterator */
con->out_msg_pos.page = 0; con->out_msg_pos.page = 0;
if (m->pages) if (m->pages)
con->out_msg_pos.page_pos = con->out_msg_pos.page_pos = m->page_alignment;
le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
else else
con->out_msg_pos.page_pos = 0; con->out_msg_pos.page_pos = 0;
con->out_msg_pos.data_pos = 0; con->out_msg_pos.data_pos = 0;
...@@ -1491,7 +1490,7 @@ static int read_partial_message(struct ceph_connection *con) ...@@ -1491,7 +1490,7 @@ static int read_partial_message(struct ceph_connection *con)
struct ceph_msg *m = con->in_msg; struct ceph_msg *m = con->in_msg;
int ret; int ret;
int to, left; int to, left;
unsigned front_len, middle_len, data_len, data_off; unsigned front_len, middle_len, data_len;
int datacrc = con->msgr->nocrc; int datacrc = con->msgr->nocrc;
int skip; int skip;
u64 seq; u64 seq;
...@@ -1527,19 +1526,17 @@ static int read_partial_message(struct ceph_connection *con) ...@@ -1527,19 +1526,17 @@ static int read_partial_message(struct ceph_connection *con)
data_len = le32_to_cpu(con->in_hdr.data_len); data_len = le32_to_cpu(con->in_hdr.data_len);
if (data_len > CEPH_MSG_MAX_DATA_LEN) if (data_len > CEPH_MSG_MAX_DATA_LEN)
return -EIO; return -EIO;
data_off = le16_to_cpu(con->in_hdr.data_off);
/* verify seq# */ /* verify seq# */
seq = le64_to_cpu(con->in_hdr.seq); seq = le64_to_cpu(con->in_hdr.seq);
if ((s64)seq - (s64)con->in_seq < 1) { if ((s64)seq - (s64)con->in_seq < 1) {
pr_info("skipping %s%lld %s seq %lld, expected %lld\n", pr_info("skipping %s%lld %s seq %lld expected %lld\n",
ENTITY_NAME(con->peer_name), ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), ceph_pr_addr(&con->peer_addr.in_addr),
seq, con->in_seq + 1); seq, con->in_seq + 1);
con->in_base_pos = -front_len - middle_len - data_len - con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer); sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY; con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
return 0; return 0;
} else if ((s64)seq - (s64)con->in_seq > 1) { } else if ((s64)seq - (s64)con->in_seq > 1) {
pr_err("read_partial_message bad seq %lld expected %lld\n", pr_err("read_partial_message bad seq %lld expected %lld\n",
...@@ -1576,7 +1573,7 @@ static int read_partial_message(struct ceph_connection *con) ...@@ -1576,7 +1573,7 @@ static int read_partial_message(struct ceph_connection *con)
con->in_msg_pos.page = 0; con->in_msg_pos.page = 0;
if (m->pages) if (m->pages)
con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; con->in_msg_pos.page_pos = m->page_alignment;
else else
con->in_msg_pos.page_pos = 0; con->in_msg_pos.page_pos = 0;
con->in_msg_pos.data_pos = 0; con->in_msg_pos.data_pos = 0;
...@@ -2301,6 +2298,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) ...@@ -2301,6 +2298,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
/* data */ /* data */
m->nr_pages = 0; m->nr_pages = 0;
m->page_alignment = 0;
m->pages = NULL; m->pages = NULL;
m->pagelist = NULL; m->pagelist = NULL;
m->bio = NULL; m->bio = NULL;
...@@ -2370,6 +2368,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, ...@@ -2370,6 +2368,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
type, front_len); type, front_len);
return NULL; return NULL;
} }
msg->page_alignment = le16_to_cpu(hdr->data_off);
} }
memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
......
...@@ -71,6 +71,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc, ...@@ -71,6 +71,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
op->extent.length = objlen; op->extent.length = objlen;
} }
req->r_num_pages = calc_pages_for(off, *plen); req->r_num_pages = calc_pages_for(off, *plen);
req->r_page_alignment = off & ~PAGE_MASK;
if (op->op == CEPH_OSD_OP_WRITE) if (op->op == CEPH_OSD_OP_WRITE)
op->payload_len = *plen; op->payload_len = *plen;
...@@ -390,6 +391,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, ...@@ -390,6 +391,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
req->r_request->hdr.data_len = cpu_to_le32(data_len); req->r_request->hdr.data_len = cpu_to_le32(data_len);
} }
req->r_request->page_alignment = req->r_page_alignment;
BUG_ON(p > msg->front.iov_base + msg->front.iov_len); BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
msg_size = p - msg->front.iov_base; msg_size = p - msg->front.iov_base;
msg->front.iov_len = msg_size; msg->front.iov_len = msg_size;
...@@ -419,7 +422,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, ...@@ -419,7 +422,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
u32 truncate_seq, u32 truncate_seq,
u64 truncate_size, u64 truncate_size,
struct timespec *mtime, struct timespec *mtime,
bool use_mempool, int num_reply) bool use_mempool, int num_reply,
int page_align)
{ {
struct ceph_osd_req_op ops[3]; struct ceph_osd_req_op ops[3];
struct ceph_osd_request *req; struct ceph_osd_request *req;
...@@ -447,6 +451,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, ...@@ -447,6 +451,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
calc_layout(osdc, vino, layout, off, plen, req, ops); calc_layout(osdc, vino, layout, off, plen, req, ops);
req->r_file_layout = *layout; /* keep a copy */ req->r_file_layout = *layout; /* keep a copy */
/* in case it differs from natural alignment that calc_layout
filled in for us */
req->r_page_alignment = page_align;
ceph_osdc_build_request(req, off, plen, ops, ceph_osdc_build_request(req, off, plen, ops,
snapc, snapc,
mtime, mtime,
...@@ -1489,7 +1497,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, ...@@ -1489,7 +1497,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
struct ceph_vino vino, struct ceph_file_layout *layout, struct ceph_vino vino, struct ceph_file_layout *layout,
u64 off, u64 *plen, u64 off, u64 *plen,
u32 truncate_seq, u64 truncate_size, u32 truncate_seq, u64 truncate_size,
struct page **pages, int num_pages) struct page **pages, int num_pages, int page_align)
{ {
struct ceph_osd_request *req; struct ceph_osd_request *req;
int rc = 0; int rc = 0;
...@@ -1499,15 +1507,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, ...@@ -1499,15 +1507,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
req = ceph_osdc_new_request(osdc, layout, vino, off, plen, req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
NULL, 0, truncate_seq, truncate_size, NULL, NULL, 0, truncate_seq, truncate_size, NULL,
false, 1); false, 1, page_align);
if (!req) if (!req)
return -ENOMEM; return -ENOMEM;
/* it may be a short read due to an object boundary */ /* it may be a short read due to an object boundary */
req->r_pages = pages; req->r_pages = pages;
dout("readpages final extent is %llu~%llu (%d pages)\n", dout("readpages final extent is %llu~%llu (%d pages align %d)\n",
off, *plen, req->r_num_pages); off, *plen, req->r_num_pages, page_align);
rc = ceph_osdc_start_request(osdc, req, false); rc = ceph_osdc_start_request(osdc, req, false);
if (!rc) if (!rc)
...@@ -1533,6 +1541,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, ...@@ -1533,6 +1541,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
{ {
struct ceph_osd_request *req; struct ceph_osd_request *req;
int rc = 0; int rc = 0;
int page_align = off & ~PAGE_MASK;
BUG_ON(vino.snap != CEPH_NOSNAP); BUG_ON(vino.snap != CEPH_NOSNAP);
req = ceph_osdc_new_request(osdc, layout, vino, off, &len, req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
...@@ -1541,7 +1550,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, ...@@ -1541,7 +1550,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
CEPH_OSD_FLAG_WRITE, CEPH_OSD_FLAG_WRITE,
snapc, do_sync, snapc, do_sync,
truncate_seq, truncate_size, mtime, truncate_seq, truncate_size, mtime,
nofail, 1); nofail, 1, page_align);
if (!req) if (!req)
return -ENOMEM; return -ENOMEM;
...@@ -1638,8 +1647,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, ...@@ -1638,8 +1647,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
m = ceph_msg_get(req->r_reply); m = ceph_msg_get(req->r_reply);
if (data_len > 0) { if (data_len > 0) {
unsigned data_off = le16_to_cpu(hdr->data_off); int want = calc_pages_for(req->r_page_alignment, data_len);
int want = calc_pages_for(data_off & ~PAGE_MASK, data_len);
if (unlikely(req->r_num_pages < want)) { if (unlikely(req->r_num_pages < want)) {
pr_warning("tid %lld reply %d > expected %d pages\n", pr_warning("tid %lld reply %d > expected %d pages\n",
...@@ -1651,6 +1659,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, ...@@ -1651,6 +1659,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
} }
m->pages = req->r_pages; m->pages = req->r_pages;
m->nr_pages = req->r_num_pages; m->nr_pages = req->r_num_pages;
m->page_alignment = req->r_page_alignment;
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
m->bio = req->r_bio; m->bio = req->r_bio;
#endif #endif
......
...@@ -13,8 +13,7 @@ ...@@ -13,8 +13,7 @@
* build a vector of user pages * build a vector of user pages
*/ */
struct page **ceph_get_direct_page_vector(const char __user *data, struct page **ceph_get_direct_page_vector(const char __user *data,
int num_pages, int num_pages)
loff_t off, size_t len)
{ {
struct page **pages; struct page **pages;
int rc; int rc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment