Commit d9b9c893 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-5.3-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "Lots of exciting things this time!

   - support for rbd object-map and fast-diff features (myself). This
     will speed up reads, discards and things like snap diffs on sparse
     images.

   - ceph.snap.btime vxattr to expose snapshot creation time (David
     Disseldorp). This will be used to integrate with "Restore Previous
     Versions" feature added in Windows 7 for folks who reexport ceph
     through SMB.

   - security xattrs for ceph (Zheng Yan). Only selinux is supported for
     now due to the limitations of ->dentry_init_security().

   - support for MSG_ADDR2, FS_BTIME and FS_CHANGE_ATTR features (Jeff
     Layton). This is actually a single feature bit which was missing
     because of the filesystem pieces. With this in, the kernel client
     will finally be reported as "luminous" by "ceph features" -- it is
     still being reported as "jewel" even though all required Luminous
     features were implemented in 4.13.

   - stop NULL-terminating ceph vxattrs (Jeff Layton). The convention
     with xattrs is to not terminate and this was causing
     inconsistencies with ceph-fuse.

   - change filesystem time granularity from 1 us to 1 ns, again fixing
     an inconsistency with ceph-fuse (Luis Henriques).

  On top of this there are some additional dentry name handling and cap
  flushing fixes from Zheng. Finally, Jeff is formally taking over for
  Zheng as the filesystem maintainer"

* tag 'ceph-for-5.3-rc1' of git://github.com/ceph/ceph-client: (71 commits)
  ceph: fix end offset in truncate_inode_pages_range call
  ceph: use generic_delete_inode() for ->drop_inode
  ceph: use ceph_evict_inode to cleanup inode's resource
  ceph: initialize superblock s_time_gran to 1
  MAINTAINERS: take over for Zheng as CephFS kernel client maintainer
  rbd: setallochint only if object doesn't exist
  rbd: support for object-map and fast-diff
  rbd: call rbd_dev_mapping_set() from rbd_dev_image_probe()
  libceph: export osd_req_op_data() macro
  libceph: change ceph_osdc_call() to take page vector for response
  libceph: bump CEPH_MSG_MAX_DATA_LEN (again)
  rbd: new exclusive lock wait/wake code
  rbd: quiescing lock should wait for image requests
  rbd: lock should be quiesced on reacquire
  rbd: introduce copyup state machine
  rbd: rename rbd_obj_setup_*() to rbd_obj_init_*()
  rbd: move OSD request allocation into object request state machines
  rbd: factor out __rbd_osd_setup_discard_ops()
  rbd: factor out rbd_osd_setup_copyup()
  rbd: introduce obj_req->osd_reqs list
  ...
parents 0fe49f70 d31d07b9
......@@ -3765,7 +3765,7 @@ F: arch/powerpc/platforms/cell/
CEPH COMMON CODE (LIBCEPH)
M: Ilya Dryomov <idryomov@gmail.com>
M: "Yan, Zheng" <zyan@redhat.com>
M: Jeff Layton <jlayton@kernel.org>
M: Sage Weil <sage@redhat.com>
L: ceph-devel@vger.kernel.org
W: http://ceph.com/
......@@ -3777,7 +3777,7 @@ F: include/linux/ceph/
F: include/linux/crush/
CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
M: "Yan, Zheng" <zyan@redhat.com>
M: Jeff Layton <jlayton@kernel.org>
M: Sage Weil <sage@redhat.com>
M: Ilya Dryomov <idryomov@gmail.com>
L: ceph-devel@vger.kernel.org
......
This diff is collapsed.
......@@ -18,6 +18,7 @@
/* For format version 2, rbd image 'foo' consists of objects
* rbd_id.foo - id of image
* rbd_header.<id> - image metadata
* rbd_object_map.<id> - optional image object map
* rbd_data.<id>.0000000000000000
* rbd_data.<id>.0000000000000001
* ... - data
......@@ -25,6 +26,7 @@
*/
#define RBD_HEADER_PREFIX "rbd_header."
#define RBD_OBJECT_MAP_PREFIX "rbd_object_map."
#define RBD_ID_PREFIX "rbd_id."
#define RBD_V2_DATA_FORMAT "%s.%016llx"
......@@ -39,6 +41,14 @@ enum rbd_notify_op {
RBD_NOTIFY_OP_HEADER_UPDATE = 3,
};
#define OBJECT_NONEXISTENT 0
#define OBJECT_EXISTS 1
#define OBJECT_PENDING 2
#define OBJECT_EXISTS_CLEAN 3
#define RBD_FLAG_OBJECT_MAP_INVALID (1ULL << 0)
#define RBD_FLAG_FAST_DIFF_INVALID (1ULL << 1)
/*
* For format version 1, rbd image 'foo' consists of objects
* foo.rbd - image metadata
......
......@@ -36,3 +36,15 @@ config CEPH_FS_POSIX_ACL
groups beyond the owner/group/world scheme.
If you don't know what Access Control Lists are, say N
config CEPH_FS_SECURITY_LABEL
bool "CephFS Security Labels"
depends on CEPH_FS && SECURITY
help
Security labels support alternative access control models
implemented by security modules like SELinux. This option
enables an extended attribute handler for file security
labels in the Ceph filesystem.
If you are not using a security module that requires using
extended attributes for file security labels, say N.
......@@ -159,7 +159,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
}
int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
struct ceph_acls_info *info)
struct ceph_acl_sec_ctx *as_ctx)
{
struct posix_acl *acl, *default_acl;
size_t val_size1 = 0, val_size2 = 0;
......@@ -234,9 +234,9 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
kfree(tmp_buf);
info->acl = acl;
info->default_acl = default_acl;
info->pagelist = pagelist;
as_ctx->acl = acl;
as_ctx->default_acl = default_acl;
as_ctx->pagelist = pagelist;
return 0;
out_err:
......@@ -248,18 +248,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
return err;
}
void ceph_init_inode_acls(struct inode* inode, struct ceph_acls_info *info)
void ceph_init_inode_acls(struct inode *inode, struct ceph_acl_sec_ctx *as_ctx)
{
if (!inode)
return;
ceph_set_cached_acl(inode, ACL_TYPE_ACCESS, info->acl);
ceph_set_cached_acl(inode, ACL_TYPE_DEFAULT, info->default_acl);
}
void ceph_release_acls_info(struct ceph_acls_info *info)
{
posix_acl_release(info->acl);
posix_acl_release(info->default_acl);
if (info->pagelist)
ceph_pagelist_release(info->pagelist);
ceph_set_cached_acl(inode, ACL_TYPE_ACCESS, as_ctx->acl);
ceph_set_cached_acl(inode, ACL_TYPE_DEFAULT, as_ctx->default_acl);
}
......@@ -10,6 +10,7 @@
#include <linux/pagevec.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/signal.h>
#include <linux/iversion.h>
#include "super.h"
#include "mds_client.h"
......@@ -1576,6 +1577,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
/* Update time before taking page lock */
file_update_time(vma->vm_file);
inode_inc_iversion_raw(inode);
do {
lock_page(page);
......
This diff is collapsed.
......@@ -52,7 +52,7 @@ static int mdsc_show(struct seq_file *s, void *p)
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct rb_node *rp;
int pathlen;
int pathlen = 0;
u64 pathbase;
char *path;
......
......@@ -825,7 +825,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct ceph_acls_info acls = {};
struct ceph_acl_sec_ctx as_ctx = {};
int err;
if (ceph_snap(dir) != CEPH_NOSNAP)
......@@ -836,7 +836,10 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
goto out;
}
err = ceph_pre_init_acls(dir, &mode, &acls);
err = ceph_pre_init_acls(dir, &mode, &as_ctx);
if (err < 0)
goto out;
err = ceph_security_init_secctx(dentry, mode, &as_ctx);
if (err < 0)
goto out;
......@@ -855,9 +858,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
req->r_args.mknod.rdev = cpu_to_le32(rdev);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (acls.pagelist) {
req->r_pagelist = acls.pagelist;
acls.pagelist = NULL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
as_ctx.pagelist = NULL;
}
err = ceph_mdsc_do_request(mdsc, dir, req);
if (!err && !req->r_reply_info.head->is_dentry)
......@@ -865,10 +868,10 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
ceph_mdsc_put_request(req);
out:
if (!err)
ceph_init_inode_acls(d_inode(dentry), &acls);
ceph_init_inode_acls(d_inode(dentry), &as_ctx);
else
d_drop(dentry);
ceph_release_acls_info(&acls);
ceph_release_acl_sec_ctx(&as_ctx);
return err;
}
......@@ -884,6 +887,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct ceph_acl_sec_ctx as_ctx = {};
int err;
if (ceph_snap(dir) != CEPH_NOSNAP)
......@@ -894,6 +898,10 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
goto out;
}
err = ceph_security_init_secctx(dentry, S_IFLNK | 0777, &as_ctx);
if (err < 0)
goto out;
dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
if (IS_ERR(req)) {
......@@ -919,6 +927,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
out:
if (err)
d_drop(dentry);
ceph_release_acl_sec_ctx(&as_ctx);
return err;
}
......@@ -927,7 +936,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct ceph_acls_info acls = {};
struct ceph_acl_sec_ctx as_ctx = {};
int err = -EROFS;
int op;
......@@ -950,7 +959,10 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
}
mode |= S_IFDIR;
err = ceph_pre_init_acls(dir, &mode, &acls);
err = ceph_pre_init_acls(dir, &mode, &as_ctx);
if (err < 0)
goto out;
err = ceph_security_init_secctx(dentry, mode, &as_ctx);
if (err < 0)
goto out;
......@@ -967,9 +979,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
req->r_args.mkdir.mode = cpu_to_le32(mode);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (acls.pagelist) {
req->r_pagelist = acls.pagelist;
acls.pagelist = NULL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
as_ctx.pagelist = NULL;
}
err = ceph_mdsc_do_request(mdsc, dir, req);
if (!err &&
......@@ -979,10 +991,10 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
ceph_mdsc_put_request(req);
out:
if (!err)
ceph_init_inode_acls(d_inode(dentry), &acls);
ceph_init_inode_acls(d_inode(dentry), &as_ctx);
else
d_drop(dentry);
ceph_release_acls_info(&acls);
ceph_release_acl_sec_ctx(&as_ctx);
return err;
}
......@@ -1433,8 +1445,7 @@ static bool __dentry_lease_is_valid(struct ceph_dentry_info *di)
return false;
}
static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags,
struct inode *dir)
static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags)
{
struct ceph_dentry_info *di;
struct ceph_mds_session *session = NULL;
......@@ -1466,7 +1477,7 @@ static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags,
spin_unlock(&dentry->d_lock);
if (session) {
ceph_mdsc_lease_send_msg(session, dir, dentry,
ceph_mdsc_lease_send_msg(session, dentry,
CEPH_MDS_LEASE_RENEW, seq);
ceph_put_mds_session(session);
}
......@@ -1512,18 +1523,26 @@ static int __dir_lease_try_check(const struct dentry *dentry)
static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
{
struct ceph_inode_info *ci = ceph_inode(dir);
struct ceph_dentry_info *di = ceph_dentry(dentry);
int valid = 0;
int valid;
int shared_gen;
spin_lock(&ci->i_ceph_lock);
if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen)
valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
shared_gen = atomic_read(&ci->i_shared_gen);
spin_unlock(&ci->i_ceph_lock);
if (valid)
if (valid) {
struct ceph_dentry_info *di;
spin_lock(&dentry->d_lock);
di = ceph_dentry(dentry);
if (dir == d_inode(dentry->d_parent) &&
di && di->lease_shared_gen == shared_gen)
__ceph_dentry_dir_lease_touch(di);
dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
dir, (unsigned)atomic_read(&ci->i_shared_gen),
dentry, (unsigned)di->lease_shared_gen, valid);
else
valid = 0;
spin_unlock(&dentry->d_lock);
}
dout("dir_lease_is_valid dir %p v%u dentry %p = %d\n",
dir, (unsigned)atomic_read(&ci->i_shared_gen), dentry, valid);
return valid;
}
......@@ -1558,7 +1577,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) {
valid = 1;
} else {
valid = dentry_lease_is_valid(dentry, flags, dir);
valid = dentry_lease_is_valid(dentry, flags);
if (valid == -ECHILD)
return valid;
if (valid || dir_lease_is_valid(dir, dentry)) {
......
......@@ -368,7 +368,7 @@ static struct dentry *ceph_get_parent(struct dentry *child)
}
out:
dout("get_parent %p ino %llx.%llx err=%ld\n",
child, ceph_vinop(inode), (IS_ERR(dn) ? PTR_ERR(dn) : 0));
child, ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
return dn;
}
......
......@@ -10,6 +10,7 @@
#include <linux/namei.h>
#include <linux/writeback.h>
#include <linux/falloc.h>
#include <linux/iversion.h>
#include "super.h"
#include "mds_client.h"
......@@ -437,7 +438,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct dentry *dn;
struct ceph_acls_info acls = {};
struct ceph_acl_sec_ctx as_ctx = {};
int mask;
int err;
......@@ -451,25 +452,28 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
if (flags & O_CREAT) {
if (ceph_quota_is_max_files_exceeded(dir))
return -EDQUOT;
err = ceph_pre_init_acls(dir, &mode, &acls);
err = ceph_pre_init_acls(dir, &mode, &as_ctx);
if (err < 0)
return err;
err = ceph_security_init_secctx(dentry, mode, &as_ctx);
if (err < 0)
goto out_ctx;
}
/* do the open */
req = prepare_open_request(dir->i_sb, flags, mode);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto out_acl;
goto out_ctx;
}
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
if (flags & O_CREAT) {
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (acls.pagelist) {
req->r_pagelist = acls.pagelist;
acls.pagelist = NULL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
as_ctx.pagelist = NULL;
}
}
......@@ -507,7 +511,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
} else {
dout("atomic_open finish_open on dn %p\n", dn);
if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
ceph_init_inode_acls(d_inode(dentry), &acls);
ceph_init_inode_acls(d_inode(dentry), &as_ctx);
file->f_mode |= FMODE_CREATED;
}
err = finish_open(file, dentry, ceph_open);
......@@ -516,8 +520,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
if (!req->r_err && req->r_target_inode)
ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
ceph_mdsc_put_request(req);
out_acl:
ceph_release_acls_info(&acls);
out_ctx:
ceph_release_acl_sec_ctx(&as_ctx);
dout("atomic_open result=%d\n", err);
return err;
}
......@@ -1007,7 +1011,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
* may block.
*/
truncate_inode_pages_range(inode->i_mapping, pos,
(pos+len) | (PAGE_SIZE - 1));
PAGE_ALIGN(pos + len) - 1);
req->r_mtime = mtime;
}
......@@ -1022,7 +1026,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
req->r_callback = ceph_aio_complete_req;
req->r_inode = inode;
req->r_priv = aio_req;
list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
list_add_tail(&req->r_private_item, &aio_req->osd_reqs);
pos += len;
continue;
......@@ -1082,8 +1086,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
while (!list_empty(&osd_reqs)) {
req = list_first_entry(&osd_reqs,
struct ceph_osd_request,
r_unsafe_item);
list_del_init(&req->r_unsafe_item);
r_private_item);
list_del_init(&req->r_private_item);
if (ret >= 0)
ret = ceph_osdc_start_request(req->r_osdc,
req, false);
......@@ -1432,6 +1436,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err)
goto out;
inode_inc_iversion_raw(inode);
if (ci->i_inline_version != CEPH_INLINE_NONE) {
err = ceph_uninline_data(file, NULL);
if (err < 0)
......@@ -2063,6 +2069,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
do_final_copy = true;
file_update_time(dst_file);
inode_inc_iversion_raw(dst_inode);
if (endoff > size) {
int caps_flags = 0;
......
This diff is collapsed.
......@@ -150,14 +150,13 @@ static int parse_reply_info_in(void **p, void *end,
info->pool_ns_data = *p;
*p += info->pool_ns_len;
}
/* btime, change_attr */
{
struct ceph_timespec btime;
u64 change_attr;
ceph_decode_need(p, end, sizeof(btime), bad);
ceph_decode_copy(p, &btime, sizeof(btime));
ceph_decode_64_safe(p, end, change_attr, bad);
}
/* btime */
ceph_decode_need(p, end, sizeof(info->btime), bad);
ceph_decode_copy(p, &info->btime, sizeof(info->btime));
/* change attribute */
ceph_decode_64_safe(p, end, info->change_attr, bad);
/* dir pin */
if (struct_v >= 2) {
......@@ -166,6 +165,15 @@ static int parse_reply_info_in(void **p, void *end,
info->dir_pin = -ENODATA;
}
/* snapshot birth time, remains zero for v<=2 */
if (struct_v >= 3) {
ceph_decode_need(p, end, sizeof(info->snap_btime), bad);
ceph_decode_copy(p, &info->snap_btime,
sizeof(info->snap_btime));
} else {
memset(&info->snap_btime, 0, sizeof(info->snap_btime));
}
*p = end;
} else {
if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
......@@ -197,7 +205,14 @@ static int parse_reply_info_in(void **p, void *end,
}
}
if (features & CEPH_FEATURE_FS_BTIME) {
ceph_decode_need(p, end, sizeof(info->btime), bad);
ceph_decode_copy(p, &info->btime, sizeof(info->btime));
ceph_decode_64_safe(p, end, info->change_attr, bad);
}
info->dir_pin = -ENODATA;
/* info->snap_btime remains zero */
}
return 0;
bad:
......@@ -717,6 +732,7 @@ void ceph_mdsc_release_request(struct kref *kref)
ceph_pagelist_release(req->r_pagelist);
put_request_session(req);
ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
WARN_ON_ONCE(!list_empty(&req->r_wait));
kfree(req);
}
......@@ -903,7 +919,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
struct inode *dir;
rcu_read_lock();
parent = req->r_dentry->d_parent;
parent = READ_ONCE(req->r_dentry->d_parent);
dir = req->r_parent ? : d_inode_rcu(parent);
if (!dir || dir->i_sb != mdsc->fsc->sb) {
......@@ -2135,7 +2151,7 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
memcpy(path + pos, temp->d_name.name, temp->d_name.len);
}
spin_unlock(&temp->d_lock);
temp = temp->d_parent;
temp = READ_ONCE(temp->d_parent);
/* Are we at the root? */
if (IS_ROOT(temp))
......@@ -3727,23 +3743,11 @@ static void check_new_map(struct ceph_mds_client *mdsc,
ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
ceph_session_state_name(s->s_state));
if (i >= newmap->m_num_mds ||
memcmp(ceph_mdsmap_get_addr(oldmap, i),
ceph_mdsmap_get_addr(newmap, i),
sizeof(struct ceph_entity_addr))) {
if (s->s_state == CEPH_MDS_SESSION_OPENING) {
/* the session never opened, just close it
* out now */
get_session(s);
__unregister_session(mdsc, s);
__wake_requests(mdsc, &s->s_waiting);
ceph_put_mds_session(s);
} else if (i >= newmap->m_num_mds) {
if (i >= newmap->m_num_mds) {
/* force close session for stopped mds */
get_session(s);
__unregister_session(mdsc, s);
__wake_requests(mdsc, &s->s_waiting);
kick_requests(mdsc, i);
mutex_unlock(&mdsc->mutex);
mutex_lock(&s->s_mutex);
......@@ -3754,7 +3758,13 @@ static void check_new_map(struct ceph_mds_client *mdsc,
ceph_put_mds_session(s);
mutex_lock(&mdsc->mutex);
} else {
kick_requests(mdsc, i);
continue;
}
if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
ceph_mdsmap_get_addr(newmap, i),
sizeof(struct ceph_entity_addr))) {
/* just close it */
mutex_unlock(&mdsc->mutex);
mutex_lock(&s->s_mutex);
......@@ -3762,7 +3772,6 @@ static void check_new_map(struct ceph_mds_client *mdsc,
ceph_con_close(&s->s_con);
mutex_unlock(&s->s_mutex);
s->s_state = CEPH_MDS_SESSION_RESTARTING;
}
} else if (oldstate == newstate) {
continue; /* nothing new with this mds */
}
......@@ -3931,31 +3940,33 @@ static void handle_lease(struct ceph_mds_client *mdsc,
}
void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
struct inode *inode,
struct dentry *dentry, char action,
u32 seq)
{
struct ceph_msg *msg;
struct ceph_mds_lease *lease;
int len = sizeof(*lease) + sizeof(u32);
int dnamelen = 0;
struct inode *dir;
int len = sizeof(*lease) + sizeof(u32) + NAME_MAX;
dout("lease_send_msg inode %p dentry %p %s to mds%d\n",
inode, dentry, ceph_lease_op_name(action), session->s_mds);
dnamelen = dentry->d_name.len;
len += dnamelen;
dout("lease_send_msg identry %p %s to mds%d\n",
dentry, ceph_lease_op_name(action), session->s_mds);
msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false);
if (!msg)
return;
lease = msg->front.iov_base;
lease->action = action;
lease->ino = cpu_to_le64(ceph_vino(inode).ino);
lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap);
lease->seq = cpu_to_le32(seq);
put_unaligned_le32(dnamelen, lease + 1);
memcpy((void *)(lease + 1) + 4, dentry->d_name.name, dnamelen);
spin_lock(&dentry->d_lock);
dir = d_inode(dentry->d_parent);
lease->ino = cpu_to_le64(ceph_ino(dir));
lease->first = lease->last = cpu_to_le64(ceph_snap(dir));
put_unaligned_le32(dentry->d_name.len, lease + 1);
memcpy((void *)(lease + 1) + 4,
dentry->d_name.name, dentry->d_name.len);
spin_unlock(&dentry->d_lock);
/*
* if this is a preemptive lease RELEASE, no need to
* flush request stream, since the actual request will
......@@ -4157,6 +4168,7 @@ static void wait_requests(struct ceph_mds_client *mdsc)
while ((req = __get_oldest_req(mdsc))) {
dout("wait_requests timed out on tid %llu\n",
req->r_tid);
list_del_init(&req->r_wait);
__unregister_request(mdsc, req);
}
}
......
......@@ -69,6 +69,9 @@ struct ceph_mds_reply_info_in {
u64 max_bytes;
u64 max_files;
s32 dir_pin;
struct ceph_timespec btime;
struct ceph_timespec snap_btime;
u64 change_attr;
};
struct ceph_mds_reply_dir_entry {
......@@ -504,7 +507,6 @@ extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
struct inode *inode,
struct dentry *dentry, char action,
u32 seq);
......
......@@ -107,7 +107,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
struct ceph_mdsmap *m;
const void *start = *p;
int i, j, n;
int err = -EINVAL;
int err;
u8 mdsmap_v, mdsmap_cv;
u16 mdsmap_ev;
......@@ -183,8 +183,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
inc = ceph_decode_32(p);
state = ceph_decode_32(p);
state_seq = ceph_decode_64(p);
ceph_decode_copy(p, &addr, sizeof(addr));
ceph_decode_addr(&addr);
err = ceph_decode_entity_addr(p, end, &addr);
if (err)
goto corrupt;
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
*p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad);
......@@ -357,7 +358,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
nomem:
err = -ENOMEM;
goto out_err;
bad:
corrupt:
pr_err("corrupt mdsmap\n");
print_hex_dump(KERN_DEBUG, "mdsmap: ",
DUMP_PREFIX_OFFSET, 16, 1,
......@@ -365,6 +366,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
out_err:
ceph_mdsmap_destroy(m);
return ERR_PTR(err);
bad:
err = -EINVAL;
goto corrupt;
}
void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
......
......@@ -135,7 +135,7 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
return NULL;
mutex_lock(&qri->mutex);
if (qri->inode) {
if (qri->inode && ceph_is_any_caps(qri->inode)) {
/* A request has already returned the inode */
mutex_unlock(&qri->mutex);
return qri->inode;
......@@ -146,7 +146,18 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
mutex_unlock(&qri->mutex);
return NULL;
}
if (qri->inode) {
/* get caps */
int ret = __ceph_do_getattr(qri->inode, NULL,
CEPH_STAT_CAP_INODE, true);
if (ret >= 0)
in = qri->inode;
else
in = ERR_PTR(ret);
} else {
in = ceph_lookup_inode(sb, realm->ino);
}
if (IS_ERR(in)) {
pr_warn("Can't lookup inode %llx (err: %ld)\n",
realm->ino, PTR_ERR(in));
......
......@@ -3,6 +3,7 @@
#include <linux/sort.h>
#include <linux/slab.h>
#include <linux/iversion.h>
#include "super.h"
#include "mds_client.h"
#include <linux/ceph/decode.h>
......@@ -606,6 +607,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->mtime = inode->i_mtime;
capsnap->atime = inode->i_atime;
capsnap->ctime = inode->i_ctime;
capsnap->btime = ci->i_btime;
capsnap->change_attr = inode_peek_iversion_raw(inode);
capsnap->time_warp_seq = ci->i_time_warp_seq;
capsnap->truncate_size = ci->i_truncate_size;
capsnap->truncate_seq = ci->i_truncate_seq;
......
......@@ -840,10 +840,10 @@ static int ceph_remount(struct super_block *sb, int *flags, char *data)
static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
.free_inode = ceph_free_inode,
.write_inode = ceph_write_inode,
.drop_inode = ceph_drop_inode,
.drop_inode = generic_delete_inode,
.evict_inode = ceph_evict_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
.remount_fs = ceph_remount,
......@@ -978,7 +978,7 @@ static int ceph_set_super(struct super_block *s, void *data)
s->s_d_op = &ceph_dentry_ops;
s->s_export_op = &ceph_export_ops;
s->s_time_gran = 1000; /* 1000 ns == 1 us */
s->s_time_gran = 1;
ret = set_anon_super(s, NULL); /* what is that second arg for? */
if (ret != 0)
......@@ -1159,17 +1159,15 @@ static int __init init_ceph(void)
goto out;
ceph_flock_init();
ceph_xattr_init();
ret = register_filesystem(&ceph_fs_type);
if (ret)
goto out_xattr;
goto out_caches;
pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
return 0;
out_xattr:
ceph_xattr_exit();
out_caches:
destroy_caches();
out:
return ret;
......@@ -1179,7 +1177,6 @@ static void __exit exit_ceph(void)
{
dout("exit_ceph\n");
unregister_filesystem(&ceph_fs_type);
ceph_xattr_exit();
destroy_caches();
}
......
......@@ -197,7 +197,8 @@ struct ceph_cap_snap {
u64 xattr_version;
u64 size;
struct timespec64 mtime, atime, ctime;
u64 change_attr;
struct timespec64 mtime, atime, ctime, btime;
u64 time_warp_seq;
u64 truncate_size;
u32 truncate_seq;
......@@ -384,6 +385,8 @@ struct ceph_inode_info {
int i_snap_realm_counter; /* snap realm (if caps) */
struct list_head i_snap_realm_item;
struct list_head i_snap_flush_item;
struct timespec64 i_btime;
struct timespec64 i_snap_btime;
struct work_struct i_work;
unsigned long i_work_mask;
......@@ -544,7 +547,12 @@ static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
long long release_count,
long long ordered_count)
{
smp_mb__before_atomic();
/*
* Makes sure operations that setup readdir cache (update page
* cache and i_size) are strongly ordered w.r.t. the following
* atomic64_set() operations.
*/
smp_mb();
atomic64_set(&ci->i_complete_seq[0], release_count);
atomic64_set(&ci->i_complete_seq[1], ordered_count);
}
......@@ -876,9 +884,8 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
extern const struct inode_operations ceph_file_iops;
extern struct inode *ceph_alloc_inode(struct super_block *sb);
extern void ceph_destroy_inode(struct inode *inode);
extern void ceph_evict_inode(struct inode *inode);
extern void ceph_free_inode(struct inode *inode);
extern int ceph_drop_inode(struct inode *inode);
extern struct inode *ceph_get_inode(struct super_block *sb,
struct ceph_vino vino);
......@@ -921,10 +928,20 @@ ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t);
extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci);
extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
extern void __init ceph_xattr_init(void);
extern void ceph_xattr_exit(void);
extern const struct xattr_handler *ceph_xattr_handlers[];
struct ceph_acl_sec_ctx {
#ifdef CONFIG_CEPH_FS_POSIX_ACL
void *default_acl;
void *acl;
#endif
#ifdef CONFIG_CEPH_FS_SECURITY_LABEL
void *sec_ctx;
u32 sec_ctxlen;
#endif
struct ceph_pagelist *pagelist;
};
#ifdef CONFIG_SECURITY
extern bool ceph_security_xattr_deadlock(struct inode *in);
extern bool ceph_security_xattr_wanted(struct inode *in);
......@@ -939,21 +956,32 @@ static inline bool ceph_security_xattr_wanted(struct inode *in)
}
#endif
/* acl.c */
struct ceph_acls_info {
void *default_acl;
void *acl;
struct ceph_pagelist *pagelist;
};
#ifdef CONFIG_CEPH_FS_SECURITY_LABEL
extern int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
struct ceph_acl_sec_ctx *ctx);
extern void ceph_security_invalidate_secctx(struct inode *inode);
#else
static inline int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
struct ceph_acl_sec_ctx *ctx)
{
return 0;
}
static inline void ceph_security_invalidate_secctx(struct inode *inode)
{
}
#endif
void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx);
/* acl.c */
#ifdef CONFIG_CEPH_FS_POSIX_ACL
struct posix_acl *ceph_get_acl(struct inode *, int);
int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type);
int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
struct ceph_acls_info *info);
void ceph_init_inode_acls(struct inode *inode, struct ceph_acls_info *info);
void ceph_release_acls_info(struct ceph_acls_info *info);
struct ceph_acl_sec_ctx *as_ctx);
void ceph_init_inode_acls(struct inode *inode,
struct ceph_acl_sec_ctx *as_ctx);
static inline void ceph_forget_all_cached_acls(struct inode *inode)
{
......@@ -966,15 +994,12 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode)
#define ceph_set_acl NULL
static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
struct ceph_acls_info *info)
struct ceph_acl_sec_ctx *as_ctx)
{
return 0;
}
static inline void ceph_init_inode_acls(struct inode *inode,
struct ceph_acls_info *info)
{
}
static inline void ceph_release_acls_info(struct ceph_acls_info *info)
struct ceph_acl_sec_ctx *as_ctx)
{
}
static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode)
......@@ -1000,7 +1025,7 @@ extern void ceph_add_cap(struct inode *inode,
unsigned cap, unsigned seq, u64 realmino, int flags,
struct ceph_cap **new_cap);
extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
extern void __ceph_remove_caps(struct inode* inode);
extern void __ceph_remove_caps(struct ceph_inode_info *ci);
extern void ceph_put_cap(struct ceph_mds_client *mdsc,
struct ceph_cap *cap);
extern int ceph_is_any_caps(struct inode *inode);
......
This diff is collapsed.
......@@ -211,6 +211,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_MON_STATEFUL_SUB | \
CEPH_FEATURE_CRUSH_TUNABLES5 | \
CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \
CEPH_FEATURE_MSG_ADDR2 | \
CEPH_FEATURE_CEPHX_V2)
#define CEPH_FEATURES_REQUIRED_DEFAULT 0
......
......@@ -682,7 +682,7 @@ extern const char *ceph_cap_op_name(int op);
/* flags field in client cap messages (version >= 10) */
#define CEPH_CLIENT_CAPS_SYNC (1<<0)
#define CEPH_CLIENT_CAPS_NO_CAPSNAP (1<<1)
#define CEPH_CLIENT_CAPS_PENDING_CAPSNAP (1<<2);
#define CEPH_CLIENT_CAPS_PENDING_CAPSNAP (1<<2)
/*
* caps message, used for capability callbacks, acks, requests, etc.
......
......@@ -52,4 +52,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
char *lock_name, u8 *type, char **tag,
struct ceph_locker **lockers, u32 *num_lockers);
int ceph_cls_assert_locked(struct ceph_osd_request *req, int which,
char *lock_name, u8 type, char *cookie, char *tag);
#endif
......@@ -218,18 +218,27 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
/*
* sockaddr_storage <-> ceph_sockaddr
*/
static inline void ceph_encode_addr(struct ceph_entity_addr *a)
#define CEPH_ENTITY_ADDR_TYPE_NONE 0
#define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1)
static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a)
{
__be16 ss_family = htons(a->in_addr.ss_family);
a->in_addr.ss_family = *(__u16 *)&ss_family;
/* Banner addresses require TYPE_NONE */
a->type = CEPH_ENTITY_ADDR_TYPE_NONE;
}
static inline void ceph_decode_addr(struct ceph_entity_addr *a)
static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a)
{
__be16 ss_family = *(__be16 *)&a->in_addr.ss_family;
a->in_addr.ss_family = ntohs(ss_family);
WARN_ON(a->in_addr.ss_family == 512);
a->type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
}
extern int ceph_decode_entity_addr(void **p, void *end,
struct ceph_entity_addr *addr);
/*
* encoders
*/
......
......@@ -84,11 +84,13 @@ struct ceph_options {
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
/*
* Handle the largest possible rbd object in one message.
* The largest possible rbd data object is 32M.
* The largest possible rbd object map object is 64M.
*
* There is no limit on the size of cephfs objects, but it has to obey
* rsize and wsize mount options anyway.
*/
#define CEPH_MSG_MAX_DATA_LEN (32*1024*1024)
#define CEPH_MSG_MAX_DATA_LEN (64*1024*1024)
#define CEPH_AUTH_NAME_DEFAULT "guest"
......@@ -299,10 +301,6 @@ int ceph_wait_for_latest_osdmap(struct ceph_client *client,
/* pagevec.c */
extern void ceph_release_page_vector(struct page **pages, int num_pages);
extern struct page **ceph_get_direct_page_vector(const void __user *data,
int num_pages,
bool write_page);
extern void ceph_put_page_vector(struct page **pages, int num_pages,
bool dirty);
extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
......
......@@ -104,7 +104,6 @@ struct ceph_mon_client {
#endif
};
extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end);
extern int ceph_monmap_contains(struct ceph_monmap *m,
struct ceph_entity_addr *addr);
......
......@@ -198,9 +198,9 @@ struct ceph_osd_request {
bool r_mempool;
struct completion r_completion; /* private to osd_client.c */
ceph_osdc_callback_t r_callback;
struct list_head r_unsafe_item;
struct inode *r_inode; /* for use by callbacks */
struct list_head r_private_item; /* ditto */
void *r_priv; /* ditto */
/* set by submitter */
......@@ -389,6 +389,14 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
#define osd_req_op_data(oreq, whch, typ, fld) \
({ \
struct ceph_osd_request *__oreq = (oreq); \
unsigned int __whch = (whch); \
BUG_ON(__whch >= __oreq->r_num_ops); \
&__oreq->r_ops[__whch].typ.fld; \
})
extern void osd_req_op_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode, u32 flags);
......@@ -497,7 +505,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
const char *class, const char *method,
unsigned int flags,
struct page *req_page, size_t req_len,
struct page *resp_page, size_t *resp_len);
struct page **resp_pages, size_t *resp_len);
extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
struct ceph_vino vino,
......
......@@ -66,4 +66,6 @@ int ceph_extent_to_file(struct ceph_file_layout *l,
struct ceph_file_extent **file_extents,
u32 *num_file_extents);
u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size);
#endif
......@@ -112,6 +112,30 @@ inode_peek_iversion_raw(const struct inode *inode)
return atomic64_read(&inode->i_version);
}
/**
* inode_set_max_iversion_raw - update i_version new value is larger
* @inode: inode to set
* @val: new i_version to set
*
* Some self-managed filesystems (e.g Ceph) will only update the i_version
* value if the new value is larger than the one we already have.
*/
static inline void
inode_set_max_iversion_raw(struct inode *inode, u64 val)
{
u64 cur, old;
cur = inode_peek_iversion_raw(inode);
for (;;) {
if (cur > val)
break;
old = atomic64_cmpxchg(&inode->i_version, cur, val);
if (likely(old == cur))
break;
cur = old;
}
}
/**
* inode_set_iversion - set i_version to a particular value
* @inode: inode to set
......
......@@ -5,7 +5,7 @@
obj-$(CONFIG_CEPH_LIB) += libceph.o
libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
mon_client.o \
mon_client.o decode.o \
cls_lock_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
striper.o \
......
......@@ -6,6 +6,7 @@
#include <linux/ceph/cls_lock_client.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/libceph.h>
/**
* ceph_cls_lock - grab rados lock for object
......@@ -264,8 +265,11 @@ static int decode_locker(void **p, void *end, struct ceph_locker *locker)
return ret;
*p += sizeof(struct ceph_timespec); /* skip expiration */
ceph_decode_copy(p, &locker->info.addr, sizeof(locker->info.addr));
ceph_decode_addr(&locker->info.addr);
ret = ceph_decode_entity_addr(p, end, &locker->info.addr);
if (ret)
return ret;
len = ceph_decode_32(p);
*p += len; /* skip description */
......@@ -360,7 +364,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
dout("%s lock_name %s\n", __func__, lock_name);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "get_info",
CEPH_OSD_FLAG_READ, get_info_op_page,
get_info_op_buf_size, reply_page, &reply_len);
get_info_op_buf_size, &reply_page, &reply_len);
dout("%s: status %d\n", __func__, ret);
if (ret >= 0) {
......@@ -375,3 +379,47 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
return ret;
}
EXPORT_SYMBOL(ceph_cls_lock_info);
int ceph_cls_assert_locked(struct ceph_osd_request *req, int which,
char *lock_name, u8 type, char *cookie, char *tag)
{
int assert_op_buf_size;
int name_len = strlen(lock_name);
int cookie_len = strlen(cookie);
int tag_len = strlen(tag);
struct page **pages;
void *p, *end;
int ret;
assert_op_buf_size = name_len + sizeof(__le32) +
cookie_len + sizeof(__le32) +
tag_len + sizeof(__le32) +
sizeof(u8) + CEPH_ENCODING_START_BLK_LEN;
if (assert_op_buf_size > PAGE_SIZE)
return -E2BIG;
ret = osd_req_op_cls_init(req, which, "lock", "assert_locked");
if (ret)
return ret;
pages = ceph_alloc_page_vector(1, GFP_NOIO);
if (IS_ERR(pages))
return PTR_ERR(pages);
p = page_address(pages[0]);
end = p + assert_op_buf_size;
/* encode cls_lock_assert_op struct */
ceph_start_encoding(&p, 1, 1,
assert_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
ceph_encode_8(&p, type);
ceph_encode_string(&p, end, cookie, cookie_len);
ceph_encode_string(&p, end, tag, tag_len);
WARN_ON(p != end);
osd_req_op_cls_request_data_pages(req, which, pages, assert_op_buf_size,
0, false, true);
return 0;
}
EXPORT_SYMBOL(ceph_cls_assert_locked);
// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/decode.h>
static int
ceph_decode_entity_addr_versioned(void **p, void *end,
struct ceph_entity_addr *addr)
{
int ret;
u8 struct_v;
u32 struct_len, addr_len;
void *struct_end;
ret = ceph_start_decoding(p, end, 1, "entity_addr_t", &struct_v,
&struct_len);
if (ret)
goto bad;
ret = -EINVAL;
struct_end = *p + struct_len;
ceph_decode_copy_safe(p, end, &addr->type, sizeof(addr->type), bad);
ceph_decode_copy_safe(p, end, &addr->nonce, sizeof(addr->nonce), bad);
ceph_decode_32_safe(p, end, addr_len, bad);
if (addr_len > sizeof(addr->in_addr))
goto bad;
memset(&addr->in_addr, 0, sizeof(addr->in_addr));
if (addr_len) {
ceph_decode_copy_safe(p, end, &addr->in_addr, addr_len, bad);
addr->in_addr.ss_family =
le16_to_cpu((__force __le16)addr->in_addr.ss_family);
}
/* Advance past anything the client doesn't yet understand */
*p = struct_end;
ret = 0;
bad:
return ret;
}
static int
ceph_decode_entity_addr_legacy(void **p, void *end,
struct ceph_entity_addr *addr)
{
int ret = -EINVAL;
/* Skip rest of type field */
ceph_decode_skip_n(p, end, 3, bad);
/*
* Clients that don't support ADDR2 always send TYPE_NONE, change it
* to TYPE_LEGACY for forward compatibility.
*/
addr->type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
ceph_decode_copy_safe(p, end, &addr->nonce, sizeof(addr->nonce), bad);
memset(&addr->in_addr, 0, sizeof(addr->in_addr));
ceph_decode_copy_safe(p, end, &addr->in_addr,
sizeof(addr->in_addr), bad);
addr->in_addr.ss_family =
be16_to_cpu((__force __be16)addr->in_addr.ss_family);
ret = 0;
bad:
return ret;
}
int
ceph_decode_entity_addr(void **p, void *end, struct ceph_entity_addr *addr)
{
u8 marker;
ceph_decode_8_safe(p, end, marker, bad);
if (marker == 1)
return ceph_decode_entity_addr_versioned(p, end, addr);
else if (marker == 0)
return ceph_decode_entity_addr_legacy(p, end, addr);
bad:
return -EINVAL;
}
EXPORT_SYMBOL(ceph_decode_entity_addr);
......@@ -199,12 +199,14 @@ const char *ceph_pr_addr(const struct ceph_entity_addr *addr)
switch (ss.ss_family) {
case AF_INET:
snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr,
snprintf(s, MAX_ADDR_STR_LEN, "(%d)%pI4:%hu",
le32_to_cpu(addr->type), &in4->sin_addr,
ntohs(in4->sin_port));
break;
case AF_INET6:
snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr,
snprintf(s, MAX_ADDR_STR_LEN, "(%d)[%pI6c]:%hu",
le32_to_cpu(addr->type), &in6->sin6_addr,
ntohs(in6->sin6_port));
break;
......@@ -220,7 +222,7 @@ EXPORT_SYMBOL(ceph_pr_addr);
static void encode_my_addr(struct ceph_messenger *msgr)
{
memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
ceph_encode_addr(&msgr->my_enc_addr);
ceph_encode_banner_addr(&msgr->my_enc_addr);
}
/*
......@@ -1732,12 +1734,14 @@ static int read_partial_banner(struct ceph_connection *con)
ret = read_partial(con, end, size, &con->actual_peer_addr);
if (ret <= 0)
goto out;
ceph_decode_banner_addr(&con->actual_peer_addr);
size = sizeof (con->peer_addr_for_me);
end += size;
ret = read_partial(con, end, size, &con->peer_addr_for_me);
if (ret <= 0)
goto out;
ceph_decode_banner_addr(&con->peer_addr_for_me);
out:
return ret;
......@@ -1981,6 +1985,7 @@ int ceph_parse_ips(const char *c, const char *end,
}
addr_set_port(&addr[i], port);
addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
......@@ -2011,9 +2016,6 @@ static int process_banner(struct ceph_connection *con)
if (verify_hello(con) < 0)
return -1;
ceph_decode_addr(&con->actual_peer_addr);
ceph_decode_addr(&con->peer_addr_for_me);
/*
* Make sure the other end is who we wanted. note that the other
* end may not yet know their ip address, so if it's 0.0.0.0, give
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -259,3 +259,20 @@ int ceph_extent_to_file(struct ceph_file_layout *l,
return 0;
}
EXPORT_SYMBOL(ceph_extent_to_file);
u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size)
{
u64 period = (u64)l->stripe_count * l->object_size;
u64 num_periods = DIV64_U64_ROUND_UP(size, period);
u64 remainder_bytes;
u64 remainder_objs = 0;
div64_u64_rem(size, period, &remainder_bytes);
if (remainder_bytes > 0 &&
remainder_bytes < (u64)l->stripe_count * l->stripe_unit)
remainder_objs = l->stripe_count -
DIV_ROUND_UP_ULL(remainder_bytes, l->stripe_unit);
return num_periods * l->stripe_count - remainder_objs;
}
EXPORT_SYMBOL(ceph_get_num_objects);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment