Commit 31990f0f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-4.20-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The highlights are:

   - a series that fixes some old memory allocation issues in libceph
     (myself). We no longer allocate memory in places where allocation
     failures cannot be handled and BUG when the allocation fails.

   - support for copy_file_range() syscall (Luis Henriques). If size and
     alignment conditions are met, it leverages RADOS copy-from
     operation. Otherwise, a local copy is performed.

   - a patch that reduces memory requirement of ceph_sync_read() from
     the size of the entire read to the size of one object (Zheng Yan).

   - fallocate() syscall is now restricted to FALLOC_FL_PUNCH_HOLE (Luis
     Henriques)"

* tag 'ceph-for-4.20-rc1' of git://github.com/ceph/ceph-client: (25 commits)
  ceph: new mount option to disable usage of copy-from op
  ceph: support copy_file_range file operation
  libceph: support the RADOS copy-from operation
  ceph: add non-blocking parameter to ceph_try_get_caps()
  libceph: check reply num_data_items in setup_request_data()
  libceph: preallocate message data items
  libceph, rbd, ceph: move ceph_osdc_alloc_messages() calls
  libceph: introduce alloc_watch_request()
  libceph: assign cookies in linger_submit()
  libceph: enable fallback to ceph_msg_new() in ceph_msgpool_get()
  ceph: num_ops is off by one in ceph_aio_retry_work()
  libceph: no need to call osd_req_opcode_valid() in osd_req_encode_op()
  ceph: set timeout conditionally in __cap_delay_requeue
  libceph: don't consume a ref on pagelist in ceph_msg_data_add_pagelist()
  libceph: introduce ceph_pagelist_alloc()
  libceph: osd_req_op_cls_init() doesn't need to take opcode
  libceph: bump CEPH_MSG_MAX_DATA_LEN
  ceph: only allow punch hole mode in fallocate
  ceph: refactor ceph_sync_read()
  ceph: check if LOOKUPNAME request was aborted when filling trace
  ...
parents a9ac6cc4 ea4cdc54
...@@ -151,6 +151,11 @@ Mount Options ...@@ -151,6 +151,11 @@ Mount Options
Report overall filesystem usage in statfs instead of using the root Report overall filesystem usage in statfs instead of using the root
directory quota. directory quota.
nocopyfrom
Don't use the RADOS 'copy-from' operation to perform remote object
copies. Currently, it's only used in copy_file_range, which will revert
to the default VFS implementation if this option is used.
More Information More Information
================ ================
......
...@@ -1500,9 +1500,6 @@ rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops) ...@@ -1500,9 +1500,6 @@ rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops)
rbd_dev->header.object_prefix, obj_req->ex.oe_objno)) rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
goto err_req; goto err_req;
if (ceph_osdc_alloc_messages(req, GFP_NOIO))
goto err_req;
return req; return req;
err_req: err_req:
...@@ -1945,6 +1942,10 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req) ...@@ -1945,6 +1942,10 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
} }
if (ret) if (ret)
return ret; return ret;
ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
if (ret)
return ret;
} }
return 0; return 0;
...@@ -2374,8 +2375,7 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) ...@@ -2374,8 +2375,7 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
if (!obj_req->osd_req) if (!obj_req->osd_req)
return -ENOMEM; return -ENOMEM;
ret = osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd", ret = osd_req_op_cls_init(obj_req->osd_req, 0, "rbd", "copyup");
"copyup");
if (ret) if (ret)
return ret; return ret;
...@@ -2405,6 +2405,10 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) ...@@ -2405,6 +2405,10 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
rbd_assert(0); rbd_assert(0);
} }
ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
if (ret)
return ret;
rbd_obj_request_submit(obj_req); rbd_obj_request_submit(obj_req);
return 0; return 0;
} }
...@@ -3784,10 +3788,6 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, ...@@ -3784,10 +3788,6 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
ceph_oloc_copy(&req->r_base_oloc, oloc); ceph_oloc_copy(&req->r_base_oloc, oloc);
req->r_flags = CEPH_OSD_FLAG_READ; req->r_flags = CEPH_OSD_FLAG_READ;
ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
if (ret)
goto out_req;
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
if (IS_ERR(pages)) { if (IS_ERR(pages)) {
ret = PTR_ERR(pages); ret = PTR_ERR(pages);
...@@ -3798,6 +3798,10 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, ...@@ -3798,6 +3798,10 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false, osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false,
true); true);
ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
if (ret)
goto out_req;
ceph_osdc_start_request(osdc, req, false); ceph_osdc_start_request(osdc, req, false);
ret = ceph_osdc_wait_request(osdc, req); ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) if (ret >= 0)
...@@ -6067,7 +6071,7 @@ static ssize_t rbd_remove_single_major(struct bus_type *bus, ...@@ -6067,7 +6071,7 @@ static ssize_t rbd_remove_single_major(struct bus_type *bus,
* create control files in sysfs * create control files in sysfs
* /sys/bus/rbd/... * /sys/bus/rbd/...
*/ */
static int rbd_sysfs_init(void) static int __init rbd_sysfs_init(void)
{ {
int ret; int ret;
...@@ -6082,13 +6086,13 @@ static int rbd_sysfs_init(void) ...@@ -6082,13 +6086,13 @@ static int rbd_sysfs_init(void)
return ret; return ret;
} }
static void rbd_sysfs_cleanup(void) static void __exit rbd_sysfs_cleanup(void)
{ {
bus_unregister(&rbd_bus_type); bus_unregister(&rbd_bus_type);
device_unregister(&rbd_root_dev); device_unregister(&rbd_root_dev);
} }
static int rbd_slab_init(void) static int __init rbd_slab_init(void)
{ {
rbd_assert(!rbd_img_request_cache); rbd_assert(!rbd_img_request_cache);
rbd_img_request_cache = KMEM_CACHE(rbd_img_request, 0); rbd_img_request_cache = KMEM_CACHE(rbd_img_request, 0);
......
...@@ -104,6 +104,11 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) ...@@ -104,6 +104,11 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
struct timespec64 old_ctime = inode->i_ctime; struct timespec64 old_ctime = inode->i_ctime;
umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
if (ceph_snap(inode) != CEPH_NOSNAP) {
ret = -EROFS;
goto out;
}
switch (type) { switch (type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
name = XATTR_NAME_POSIX_ACL_ACCESS; name = XATTR_NAME_POSIX_ACL_ACCESS;
...@@ -138,11 +143,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) ...@@ -138,11 +143,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
goto out_free; goto out_free;
} }
if (ceph_snap(inode) != CEPH_NOSNAP) {
ret = -EROFS;
goto out_free;
}
if (new_mode != old_mode) { if (new_mode != old_mode) {
newattrs.ia_ctime = current_time(inode); newattrs.ia_ctime = current_time(inode);
newattrs.ia_mode = new_mode; newattrs.ia_mode = new_mode;
...@@ -206,10 +206,9 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, ...@@ -206,10 +206,9 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL); tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL);
if (!tmp_buf) if (!tmp_buf)
goto out_err; goto out_err;
pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_KERNEL); pagelist = ceph_pagelist_alloc(GFP_KERNEL);
if (!pagelist) if (!pagelist)
goto out_err; goto out_err;
ceph_pagelist_init(pagelist);
err = ceph_pagelist_reserve(pagelist, PAGE_SIZE); err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
if (err) if (err)
......
...@@ -322,7 +322,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, ...@@ -322,7 +322,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
/* caller of readpages does not hold buffer and read caps /* caller of readpages does not hold buffer and read caps
* (fadvise, madvise and readahead cases) */ * (fadvise, madvise and readahead cases) */
int want = CEPH_CAP_FILE_CACHE; int want = CEPH_CAP_FILE_CACHE;
ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got); ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, true, &got);
if (ret < 0) { if (ret < 0) {
dout("start_read %p, error getting cap\n", inode); dout("start_read %p, error getting cap\n", inode);
} else if (!(got & want)) { } else if (!(got & want)) {
......
...@@ -519,9 +519,9 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, ...@@ -519,9 +519,9 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
* -> we take mdsc->cap_delay_lock * -> we take mdsc->cap_delay_lock
*/ */
static void __cap_delay_requeue(struct ceph_mds_client *mdsc, static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
struct ceph_inode_info *ci) struct ceph_inode_info *ci,
bool set_timeout)
{ {
__cap_set_timeouts(mdsc, ci);
dout("__cap_delay_requeue %p flags %d at %lu\n", &ci->vfs_inode, dout("__cap_delay_requeue %p flags %d at %lu\n", &ci->vfs_inode,
ci->i_ceph_flags, ci->i_hold_caps_max); ci->i_ceph_flags, ci->i_hold_caps_max);
if (!mdsc->stopping) { if (!mdsc->stopping) {
...@@ -531,6 +531,8 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc, ...@@ -531,6 +531,8 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
goto no_change; goto no_change;
list_del_init(&ci->i_cap_delay_list); list_del_init(&ci->i_cap_delay_list);
} }
if (set_timeout)
__cap_set_timeouts(mdsc, ci);
list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list); list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
no_change: no_change:
spin_unlock(&mdsc->cap_delay_lock); spin_unlock(&mdsc->cap_delay_lock);
...@@ -720,7 +722,7 @@ void ceph_add_cap(struct inode *inode, ...@@ -720,7 +722,7 @@ void ceph_add_cap(struct inode *inode,
dout(" issued %s, mds wanted %s, actual %s, queueing\n", dout(" issued %s, mds wanted %s, actual %s, queueing\n",
ceph_cap_string(issued), ceph_cap_string(wanted), ceph_cap_string(issued), ceph_cap_string(wanted),
ceph_cap_string(actual_wanted)); ceph_cap_string(actual_wanted));
__cap_delay_requeue(mdsc, ci); __cap_delay_requeue(mdsc, ci, true);
} }
if (flags & CEPH_CAP_FLAG_AUTH) { if (flags & CEPH_CAP_FLAG_AUTH) {
...@@ -1647,7 +1649,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, ...@@ -1647,7 +1649,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
(mask & CEPH_CAP_FILE_BUFFER)) (mask & CEPH_CAP_FILE_BUFFER))
dirty |= I_DIRTY_DATASYNC; dirty |= I_DIRTY_DATASYNC;
__cap_delay_requeue(mdsc, ci); __cap_delay_requeue(mdsc, ci, true);
return dirty; return dirty;
} }
...@@ -2065,7 +2067,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, ...@@ -2065,7 +2067,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
/* Reschedule delayed caps release if we delayed anything */ /* Reschedule delayed caps release if we delayed anything */
if (delayed) if (delayed)
__cap_delay_requeue(mdsc, ci); __cap_delay_requeue(mdsc, ci, false);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
...@@ -2125,7 +2127,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid) ...@@ -2125,7 +2127,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
if (delayed) { if (delayed) {
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
__cap_delay_requeue(mdsc, ci); __cap_delay_requeue(mdsc, ci, true);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
} }
} else { } else {
...@@ -2671,17 +2673,18 @@ static void check_max_size(struct inode *inode, loff_t endoff) ...@@ -2671,17 +2673,18 @@ static void check_max_size(struct inode *inode, loff_t endoff)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
} }
int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, int *got) int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
bool nonblock, int *got)
{ {
int ret, err = 0; int ret, err = 0;
BUG_ON(need & ~CEPH_CAP_FILE_RD); BUG_ON(need & ~CEPH_CAP_FILE_RD);
BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)); BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
ret = ceph_pool_perm_check(ci, need); ret = ceph_pool_perm_check(ci, need);
if (ret < 0) if (ret < 0)
return ret; return ret;
ret = try_get_cap_refs(ci, need, want, 0, true, got, &err); ret = try_get_cap_refs(ci, need, want, 0, nonblock, got, &err);
if (ret) { if (ret) {
if (err == -EAGAIN) { if (err == -EAGAIN) {
ret = 0; ret = 0;
......
This diff is collapsed.
...@@ -1132,8 +1132,12 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) ...@@ -1132,8 +1132,12 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
if (IS_ERR(realdn)) { if (IS_ERR(realdn)) {
pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
PTR_ERR(realdn), dn, in, ceph_vinop(in)); PTR_ERR(realdn), dn, in, ceph_vinop(in));
dput(dn); dn = realdn;
dn = realdn; /* note realdn contains the error */ /*
* Caller should release 'dn' in the case of error.
* If 'req->r_dentry' is passed to this function,
* caller should leave 'req->r_dentry' untouched.
*/
goto out; goto out;
} else if (realdn) { } else if (realdn) {
dout("dn %p (%d) spliced with %p (%d) " dout("dn %p (%d) spliced with %p (%d) "
...@@ -1196,7 +1200,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) ...@@ -1196,7 +1200,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
} }
if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) { if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
struct qstr dname; struct qstr dname;
struct dentry *dn, *parent; struct dentry *dn, *parent;
...@@ -1677,7 +1683,6 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ...@@ -1677,7 +1683,6 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
if (IS_ERR(realdn)) { if (IS_ERR(realdn)) {
err = PTR_ERR(realdn); err = PTR_ERR(realdn);
d_drop(dn); d_drop(dn);
dn = NULL;
goto next_item; goto next_item;
} }
dn = realdn; dn = realdn;
......
...@@ -2071,7 +2071,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ...@@ -2071,7 +2071,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
if (req->r_old_dentry_drop) if (req->r_old_dentry_drop)
len += req->r_old_dentry->d_name.len; len += req->r_old_dentry->d_name.len;
msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS, false); msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
if (!msg) { if (!msg) {
msg = ERR_PTR(-ENOMEM); msg = ERR_PTR(-ENOMEM);
goto out_free2; goto out_free2;
...@@ -2136,7 +2136,6 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ...@@ -2136,7 +2136,6 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
if (req->r_pagelist) { if (req->r_pagelist) {
struct ceph_pagelist *pagelist = req->r_pagelist; struct ceph_pagelist *pagelist = req->r_pagelist;
refcount_inc(&pagelist->refcnt);
ceph_msg_data_add_pagelist(msg, pagelist); ceph_msg_data_add_pagelist(msg, pagelist);
msg->hdr.data_len = cpu_to_le32(pagelist->length); msg->hdr.data_len = cpu_to_le32(pagelist->length);
} else { } else {
...@@ -3126,12 +3125,11 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, ...@@ -3126,12 +3125,11 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
pr_info("mds%d reconnect start\n", mds); pr_info("mds%d reconnect start\n", mds);
pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); pagelist = ceph_pagelist_alloc(GFP_NOFS);
if (!pagelist) if (!pagelist)
goto fail_nopagelist; goto fail_nopagelist;
ceph_pagelist_init(pagelist);
reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false); reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false);
if (!reply) if (!reply)
goto fail_nomsg; goto fail_nomsg;
...@@ -3241,6 +3239,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, ...@@ -3241,6 +3239,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
mutex_unlock(&mdsc->mutex); mutex_unlock(&mdsc->mutex);
up_read(&mdsc->snap_rwsem); up_read(&mdsc->snap_rwsem);
ceph_pagelist_release(pagelist);
return; return;
fail: fail:
......
...@@ -165,6 +165,8 @@ enum { ...@@ -165,6 +165,8 @@ enum {
Opt_noacl, Opt_noacl,
Opt_quotadf, Opt_quotadf,
Opt_noquotadf, Opt_noquotadf,
Opt_copyfrom,
Opt_nocopyfrom,
}; };
static match_table_t fsopt_tokens = { static match_table_t fsopt_tokens = {
...@@ -203,6 +205,8 @@ static match_table_t fsopt_tokens = { ...@@ -203,6 +205,8 @@ static match_table_t fsopt_tokens = {
{Opt_noacl, "noacl"}, {Opt_noacl, "noacl"},
{Opt_quotadf, "quotadf"}, {Opt_quotadf, "quotadf"},
{Opt_noquotadf, "noquotadf"}, {Opt_noquotadf, "noquotadf"},
{Opt_copyfrom, "copyfrom"},
{Opt_nocopyfrom, "nocopyfrom"},
{-1, NULL} {-1, NULL}
}; };
...@@ -355,6 +359,12 @@ static int parse_fsopt_token(char *c, void *private) ...@@ -355,6 +359,12 @@ static int parse_fsopt_token(char *c, void *private)
case Opt_noquotadf: case Opt_noquotadf:
fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF;
break; break;
case Opt_copyfrom:
fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM;
break;
case Opt_nocopyfrom:
fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM;
break;
#ifdef CONFIG_CEPH_FS_POSIX_ACL #ifdef CONFIG_CEPH_FS_POSIX_ACL
case Opt_acl: case Opt_acl:
fsopt->sb_flags |= SB_POSIXACL; fsopt->sb_flags |= SB_POSIXACL;
...@@ -553,6 +563,9 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) ...@@ -553,6 +563,9 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",noacl"); seq_puts(m, ",noacl");
#endif #endif
if (fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM)
seq_puts(m, ",nocopyfrom");
if (fsopt->mds_namespace) if (fsopt->mds_namespace)
seq_show_option(m, "mds_namespace", fsopt->mds_namespace); seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */
#define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */ #define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */
#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */ #define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */
#define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */
#define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE
...@@ -1008,7 +1009,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn, ...@@ -1008,7 +1009,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
loff_t endoff, int *got, struct page **pinned_page); loff_t endoff, int *got, struct page **pinned_page);
extern int ceph_try_get_caps(struct ceph_inode_info *ci, extern int ceph_try_get_caps(struct ceph_inode_info *ci,
int need, int want, int *got); int need, int want, bool nonblock, int *got);
/* for counting open files by mode */ /* for counting open files by mode */
extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode); extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode);
......
...@@ -951,11 +951,10 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, ...@@ -951,11 +951,10 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
if (size > 0) { if (size > 0) {
/* copy value into pagelist */ /* copy value into pagelist */
pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); pagelist = ceph_pagelist_alloc(GFP_NOFS);
if (!pagelist) if (!pagelist)
return -ENOMEM; return -ENOMEM;
ceph_pagelist_init(pagelist);
err = ceph_pagelist_append(pagelist, value, size); err = ceph_pagelist_append(pagelist, value, size);
if (err) if (err)
goto out; goto out;
......
...@@ -81,7 +81,13 @@ struct ceph_options { ...@@ -81,7 +81,13 @@ struct ceph_options {
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
/*
* Handle the largest possible rbd object in one message.
* There is no limit on the size of cephfs objects, but it has to obey
* rsize and wsize mount options anyway.
*/
#define CEPH_MSG_MAX_DATA_LEN (32*1024*1024)
#define CEPH_AUTH_NAME_DEFAULT "guest" #define CEPH_AUTH_NAME_DEFAULT "guest"
......
...@@ -82,22 +82,6 @@ enum ceph_msg_data_type { ...@@ -82,22 +82,6 @@ enum ceph_msg_data_type {
CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */ CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */
}; };
static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
{
switch (type) {
case CEPH_MSG_DATA_NONE:
case CEPH_MSG_DATA_PAGES:
case CEPH_MSG_DATA_PAGELIST:
#ifdef CONFIG_BLOCK
case CEPH_MSG_DATA_BIO:
#endif /* CONFIG_BLOCK */
case CEPH_MSG_DATA_BVECS:
return true;
default:
return false;
}
}
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
struct ceph_bio_iter { struct ceph_bio_iter {
...@@ -181,7 +165,6 @@ struct ceph_bvec_iter { ...@@ -181,7 +165,6 @@ struct ceph_bvec_iter {
} while (0) } while (0)
struct ceph_msg_data { struct ceph_msg_data {
struct list_head links; /* ceph_msg->data */
enum ceph_msg_data_type type; enum ceph_msg_data_type type;
union { union {
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
...@@ -202,7 +185,6 @@ struct ceph_msg_data { ...@@ -202,7 +185,6 @@ struct ceph_msg_data {
struct ceph_msg_data_cursor { struct ceph_msg_data_cursor {
size_t total_resid; /* across all data items */ size_t total_resid; /* across all data items */
struct list_head *data_head; /* = &ceph_msg->data */
struct ceph_msg_data *data; /* current data item */ struct ceph_msg_data *data; /* current data item */
size_t resid; /* bytes not yet consumed */ size_t resid; /* bytes not yet consumed */
...@@ -240,7 +222,9 @@ struct ceph_msg { ...@@ -240,7 +222,9 @@ struct ceph_msg {
struct ceph_buffer *middle; struct ceph_buffer *middle;
size_t data_length; size_t data_length;
struct list_head data; struct ceph_msg_data *data;
int num_data_items;
int max_data_items;
struct ceph_msg_data_cursor cursor; struct ceph_msg_data_cursor cursor;
struct ceph_connection *con; struct ceph_connection *con;
...@@ -381,6 +365,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, ...@@ -381,6 +365,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
void ceph_msg_data_add_bvecs(struct ceph_msg *msg, void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
struct ceph_bvec_iter *bvec_pos); struct ceph_bvec_iter *bvec_pos);
struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
gfp_t flags, bool can_fail);
extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
bool can_fail); bool can_fail);
......
...@@ -13,14 +13,15 @@ struct ceph_msgpool { ...@@ -13,14 +13,15 @@ struct ceph_msgpool {
mempool_t *pool; mempool_t *pool;
int type; /* preallocated message type */ int type; /* preallocated message type */
int front_len; /* preallocated payload size */ int front_len; /* preallocated payload size */
int max_data_items;
}; };
extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type, int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
int front_len, int size, bool blocking, int front_len, int max_data_items, int size,
const char *name); const char *name);
extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len,
int front_len); int max_data_items);
extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
#endif #endif
...@@ -136,6 +136,13 @@ struct ceph_osd_req_op { ...@@ -136,6 +136,13 @@ struct ceph_osd_req_op {
u64 expected_object_size; u64 expected_object_size;
u64 expected_write_size; u64 expected_write_size;
} alloc_hint; } alloc_hint;
struct {
u64 snapid;
u64 src_version;
u8 flags;
u32 src_fadvise_flags;
struct ceph_osd_data osd_data;
} copy_from;
}; };
}; };
...@@ -444,9 +451,8 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, ...@@ -444,9 +451,8 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
struct page **pages, u64 length, struct page **pages, u64 length,
u32 alignment, bool pages_from_pool, u32 alignment, bool pages_from_pool,
bool own_pages); bool own_pages);
extern int osd_req_op_cls_init(struct ceph_osd_request *osd_req, int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
unsigned int which, u16 opcode, const char *class, const char *method);
const char *class, const char *method);
extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
u16 opcode, const char *name, const void *value, u16 opcode, const char *name, const void *value,
size_t size, u8 cmp_op, u8 cmp_mode); size_t size, u8 cmp_op, u8 cmp_mode);
...@@ -511,6 +517,16 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, ...@@ -511,6 +517,16 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
struct timespec64 *mtime, struct timespec64 *mtime,
struct page **pages, int nr_pages); struct page **pages, int nr_pages);
int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
u64 src_snapid, u64 src_version,
struct ceph_object_id *src_oid,
struct ceph_object_locator *src_oloc,
u32 src_fadvise_flags,
struct ceph_object_id *dst_oid,
struct ceph_object_locator *dst_oloc,
u32 dst_fadvise_flags,
u8 copy_from_flags);
/* watch/notify */ /* watch/notify */
struct ceph_osd_linger_request * struct ceph_osd_linger_request *
ceph_osdc_watch(struct ceph_osd_client *osdc, ceph_osdc_watch(struct ceph_osd_client *osdc,
......
...@@ -23,16 +23,7 @@ struct ceph_pagelist_cursor { ...@@ -23,16 +23,7 @@ struct ceph_pagelist_cursor {
size_t room; /* room remaining to reset to */ size_t room; /* room remaining to reset to */
}; };
static inline void ceph_pagelist_init(struct ceph_pagelist *pl) struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags);
{
INIT_LIST_HEAD(&pl->head);
pl->mapped_tail = NULL;
pl->length = 0;
pl->room = 0;
INIT_LIST_HEAD(&pl->free_list);
pl->num_pages_free = 0;
refcount_set(&pl->refcnt, 1);
}
extern void ceph_pagelist_release(struct ceph_pagelist *pl); extern void ceph_pagelist_release(struct ceph_pagelist *pl);
......
...@@ -410,6 +410,14 @@ enum { ...@@ -410,6 +410,14 @@ enum {
enum { enum {
CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */
CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */
CEPH_OSD_OP_FLAG_FADVISE_RANDOM = 0x4, /* the op is random */
CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL = 0x8, /* the op is sequential */
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED = 0x10,/* data will be accessed in
the near future */
CEPH_OSD_OP_FLAG_FADVISE_DONTNEED = 0x20,/* data will not be accessed
in the near future */
CEPH_OSD_OP_FLAG_FADVISE_NOCACHE = 0x40,/* data will be accessed only
once by this client */
}; };
#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/
...@@ -431,6 +439,15 @@ enum { ...@@ -431,6 +439,15 @@ enum {
CEPH_OSD_CMPXATTR_MODE_U64 = 2 CEPH_OSD_CMPXATTR_MODE_U64 = 2
}; };
enum {
CEPH_OSD_COPY_FROM_FLAG_FLUSH = 1, /* part of a flush operation */
CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY = 2, /* ignore pool overlay */
CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE = 4, /* ignore osd cache logic */
CEPH_OSD_COPY_FROM_FLAG_MAP_SNAP_CLONE = 8, /* map snap direct to
* cloneid */
CEPH_OSD_COPY_FROM_FLAG_RWORDERED = 16, /* order with write */
};
enum { enum {
CEPH_OSD_WATCH_OP_UNWATCH = 0, CEPH_OSD_WATCH_OP_UNWATCH = 0,
CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1, CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1,
...@@ -497,6 +514,17 @@ struct ceph_osd_op { ...@@ -497,6 +514,17 @@ struct ceph_osd_op {
__le64 expected_object_size; __le64 expected_object_size;
__le64 expected_write_size; __le64 expected_write_size;
} __attribute__ ((packed)) alloc_hint; } __attribute__ ((packed)) alloc_hint;
struct {
__le64 snapid;
__le64 src_version;
__u8 flags; /* CEPH_OSD_COPY_FROM_FLAG_* */
/*
* CEPH_OSD_OP_FLAG_FADVISE_*: fadvise flags
* for src object, flags for dest object are in
* ceph_osd_op::flags.
*/
__le32 src_fadvise_flags;
} __attribute__ ((packed)) copy_from;
}; };
__le32 payload_len; __le32 payload_len;
} __attribute__ ((packed)); } __attribute__ ((packed));
......
...@@ -156,7 +156,6 @@ static bool con_flag_test_and_set(struct ceph_connection *con, ...@@ -156,7 +156,6 @@ static bool con_flag_test_and_set(struct ceph_connection *con,
/* Slab caches for frequently-allocated structures */ /* Slab caches for frequently-allocated structures */
static struct kmem_cache *ceph_msg_cache; static struct kmem_cache *ceph_msg_cache;
static struct kmem_cache *ceph_msg_data_cache;
/* static tag bytes (protocol control messages) */ /* static tag bytes (protocol control messages) */
static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_msg = CEPH_MSGR_TAG_MSG;
...@@ -235,23 +234,11 @@ static int ceph_msgr_slab_init(void) ...@@ -235,23 +234,11 @@ static int ceph_msgr_slab_init(void)
if (!ceph_msg_cache) if (!ceph_msg_cache)
return -ENOMEM; return -ENOMEM;
BUG_ON(ceph_msg_data_cache); return 0;
ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0);
if (ceph_msg_data_cache)
return 0;
kmem_cache_destroy(ceph_msg_cache);
ceph_msg_cache = NULL;
return -ENOMEM;
} }
static void ceph_msgr_slab_exit(void) static void ceph_msgr_slab_exit(void)
{ {
BUG_ON(!ceph_msg_data_cache);
kmem_cache_destroy(ceph_msg_data_cache);
ceph_msg_data_cache = NULL;
BUG_ON(!ceph_msg_cache); BUG_ON(!ceph_msg_cache);
kmem_cache_destroy(ceph_msg_cache); kmem_cache_destroy(ceph_msg_cache);
ceph_msg_cache = NULL; ceph_msg_cache = NULL;
...@@ -1141,16 +1128,13 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor) ...@@ -1141,16 +1128,13 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length) static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
{ {
struct ceph_msg_data_cursor *cursor = &msg->cursor; struct ceph_msg_data_cursor *cursor = &msg->cursor;
struct ceph_msg_data *data;
BUG_ON(!length); BUG_ON(!length);
BUG_ON(length > msg->data_length); BUG_ON(length > msg->data_length);
BUG_ON(list_empty(&msg->data)); BUG_ON(!msg->num_data_items);
cursor->data_head = &msg->data;
cursor->total_resid = length; cursor->total_resid = length;
data = list_first_entry(&msg->data, struct ceph_msg_data, links); cursor->data = msg->data;
cursor->data = data;
__ceph_msg_data_cursor_init(cursor); __ceph_msg_data_cursor_init(cursor);
} }
...@@ -1231,8 +1215,7 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, ...@@ -1231,8 +1215,7 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
if (!cursor->resid && cursor->total_resid) { if (!cursor->resid && cursor->total_resid) {
WARN_ON(!cursor->last_piece); WARN_ON(!cursor->last_piece);
BUG_ON(list_is_last(&cursor->data->links, cursor->data_head)); cursor->data++;
cursor->data = list_next_entry(cursor->data, links);
__ceph_msg_data_cursor_init(cursor); __ceph_msg_data_cursor_init(cursor);
new_piece = true; new_piece = true;
} }
...@@ -1248,9 +1231,6 @@ static size_t sizeof_footer(struct ceph_connection *con) ...@@ -1248,9 +1231,6 @@ static size_t sizeof_footer(struct ceph_connection *con)
static void prepare_message_data(struct ceph_msg *msg, u32 data_len) static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{ {
BUG_ON(!msg);
BUG_ON(!data_len);
/* Initialize data cursor */ /* Initialize data cursor */
ceph_msg_data_cursor_init(msg, (size_t)data_len); ceph_msg_data_cursor_init(msg, (size_t)data_len);
...@@ -1590,7 +1570,7 @@ static int write_partial_message_data(struct ceph_connection *con) ...@@ -1590,7 +1570,7 @@ static int write_partial_message_data(struct ceph_connection *con)
dout("%s %p msg %p\n", __func__, con, msg); dout("%s %p msg %p\n", __func__, con, msg);
if (list_empty(&msg->data)) if (!msg->num_data_items)
return -EINVAL; return -EINVAL;
/* /*
...@@ -2347,8 +2327,7 @@ static int read_partial_msg_data(struct ceph_connection *con) ...@@ -2347,8 +2327,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
u32 crc = 0; u32 crc = 0;
int ret; int ret;
BUG_ON(!msg); if (!msg->num_data_items)
if (list_empty(&msg->data))
return -EIO; return -EIO;
if (do_datacrc) if (do_datacrc)
...@@ -3256,32 +3235,16 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con, ...@@ -3256,32 +3235,16 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con,
return false; return false;
} }
static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg)
{ {
struct ceph_msg_data *data; BUG_ON(msg->num_data_items >= msg->max_data_items);
return &msg->data[msg->num_data_items++];
if (WARN_ON(!ceph_msg_data_type_valid(type)))
return NULL;
data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
if (!data)
return NULL;
data->type = type;
INIT_LIST_HEAD(&data->links);
return data;
} }
static void ceph_msg_data_destroy(struct ceph_msg_data *data) static void ceph_msg_data_destroy(struct ceph_msg_data *data)
{ {
if (!data)
return;
WARN_ON(!list_empty(&data->links));
if (data->type == CEPH_MSG_DATA_PAGELIST) if (data->type == CEPH_MSG_DATA_PAGELIST)
ceph_pagelist_release(data->pagelist); ceph_pagelist_release(data->pagelist);
kmem_cache_free(ceph_msg_data_cache, data);
} }
void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
...@@ -3292,13 +3255,12 @@ void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, ...@@ -3292,13 +3255,12 @@ void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
BUG_ON(!pages); BUG_ON(!pages);
BUG_ON(!length); BUG_ON(!length);
data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES); data = ceph_msg_data_add(msg);
BUG_ON(!data); data->type = CEPH_MSG_DATA_PAGES;
data->pages = pages; data->pages = pages;
data->length = length; data->length = length;
data->alignment = alignment & ~PAGE_MASK; data->alignment = alignment & ~PAGE_MASK;
list_add_tail(&data->links, &msg->data);
msg->data_length += length; msg->data_length += length;
} }
EXPORT_SYMBOL(ceph_msg_data_add_pages); EXPORT_SYMBOL(ceph_msg_data_add_pages);
...@@ -3311,11 +3273,11 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg, ...@@ -3311,11 +3273,11 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
BUG_ON(!pagelist); BUG_ON(!pagelist);
BUG_ON(!pagelist->length); BUG_ON(!pagelist->length);
data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST); data = ceph_msg_data_add(msg);
BUG_ON(!data); data->type = CEPH_MSG_DATA_PAGELIST;
refcount_inc(&pagelist->refcnt);
data->pagelist = pagelist; data->pagelist = pagelist;
list_add_tail(&data->links, &msg->data);
msg->data_length += pagelist->length; msg->data_length += pagelist->length;
} }
EXPORT_SYMBOL(ceph_msg_data_add_pagelist); EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
...@@ -3326,12 +3288,11 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, ...@@ -3326,12 +3288,11 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
{ {
struct ceph_msg_data *data; struct ceph_msg_data *data;
data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); data = ceph_msg_data_add(msg);
BUG_ON(!data); data->type = CEPH_MSG_DATA_BIO;
data->bio_pos = *bio_pos; data->bio_pos = *bio_pos;
data->bio_length = length; data->bio_length = length;
list_add_tail(&data->links, &msg->data);
msg->data_length += length; msg->data_length += length;
} }
EXPORT_SYMBOL(ceph_msg_data_add_bio); EXPORT_SYMBOL(ceph_msg_data_add_bio);
...@@ -3342,11 +3303,10 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg, ...@@ -3342,11 +3303,10 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
{ {
struct ceph_msg_data *data; struct ceph_msg_data *data;
data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS); data = ceph_msg_data_add(msg);
BUG_ON(!data); data->type = CEPH_MSG_DATA_BVECS;
data->bvec_pos = *bvec_pos; data->bvec_pos = *bvec_pos;
list_add_tail(&data->links, &msg->data);
msg->data_length += bvec_pos->iter.bi_size; msg->data_length += bvec_pos->iter.bi_size;
} }
EXPORT_SYMBOL(ceph_msg_data_add_bvecs); EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
...@@ -3355,8 +3315,8 @@ EXPORT_SYMBOL(ceph_msg_data_add_bvecs); ...@@ -3355,8 +3315,8 @@ EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
* construct a new message with given type, size * construct a new message with given type, size
* the new msg has a ref count of 1. * the new msg has a ref count of 1.
*/ */
struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
bool can_fail) gfp_t flags, bool can_fail)
{ {
struct ceph_msg *m; struct ceph_msg *m;
...@@ -3370,7 +3330,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, ...@@ -3370,7 +3330,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
INIT_LIST_HEAD(&m->list_head); INIT_LIST_HEAD(&m->list_head);
kref_init(&m->kref); kref_init(&m->kref);
INIT_LIST_HEAD(&m->data);
/* front */ /* front */
if (front_len) { if (front_len) {
...@@ -3385,6 +3344,15 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, ...@@ -3385,6 +3344,15 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
} }
m->front_alloc_len = m->front.iov_len = front_len; m->front_alloc_len = m->front.iov_len = front_len;
if (max_data_items) {
m->data = kmalloc_array(max_data_items, sizeof(*m->data),
flags);
if (!m->data)
goto out2;
m->max_data_items = max_data_items;
}
dout("ceph_msg_new %p front %d\n", m, front_len); dout("ceph_msg_new %p front %d\n", m, front_len);
return m; return m;
...@@ -3401,6 +3369,13 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, ...@@ -3401,6 +3369,13 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
} }
return NULL; return NULL;
} }
EXPORT_SYMBOL(ceph_msg_new2);
struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
bool can_fail)
{
return ceph_msg_new2(type, front_len, 0, flags, can_fail);
}
EXPORT_SYMBOL(ceph_msg_new); EXPORT_SYMBOL(ceph_msg_new);
/* /*
...@@ -3496,13 +3471,14 @@ static void ceph_msg_free(struct ceph_msg *m) ...@@ -3496,13 +3471,14 @@ static void ceph_msg_free(struct ceph_msg *m)
{ {
dout("%s %p\n", __func__, m); dout("%s %p\n", __func__, m);
kvfree(m->front.iov_base); kvfree(m->front.iov_base);
kfree(m->data);
kmem_cache_free(ceph_msg_cache, m); kmem_cache_free(ceph_msg_cache, m);
} }
static void ceph_msg_release(struct kref *kref) static void ceph_msg_release(struct kref *kref)
{ {
struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
struct ceph_msg_data *data, *next; int i;
dout("%s %p\n", __func__, m); dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head)); WARN_ON(!list_empty(&m->list_head));
...@@ -3515,11 +3491,8 @@ static void ceph_msg_release(struct kref *kref) ...@@ -3515,11 +3491,8 @@ static void ceph_msg_release(struct kref *kref)
m->middle = NULL; m->middle = NULL;
} }
list_for_each_entry_safe(data, next, &m->data, links) { for (i = 0; i < m->num_data_items; i++)
list_del_init(&data->links); ceph_msg_data_destroy(&m->data[i]);
ceph_msg_data_destroy(data);
}
m->data_length = 0;
if (m->pool) if (m->pool)
ceph_msgpool_put(m->pool, m); ceph_msgpool_put(m->pool, m);
......
...@@ -14,7 +14,8 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg) ...@@ -14,7 +14,8 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
struct ceph_msgpool *pool = arg; struct ceph_msgpool *pool = arg;
struct ceph_msg *msg; struct ceph_msg *msg;
msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true); msg = ceph_msg_new2(pool->type, pool->front_len, pool->max_data_items,
gfp_mask, true);
if (!msg) { if (!msg) {
dout("msgpool_alloc %s failed\n", pool->name); dout("msgpool_alloc %s failed\n", pool->name);
} else { } else {
...@@ -35,11 +36,13 @@ static void msgpool_free(void *element, void *arg) ...@@ -35,11 +36,13 @@ static void msgpool_free(void *element, void *arg)
} }
int ceph_msgpool_init(struct ceph_msgpool *pool, int type, int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
int front_len, int size, bool blocking, const char *name) int front_len, int max_data_items, int size,
const char *name)
{ {
dout("msgpool %s init\n", name); dout("msgpool %s init\n", name);
pool->type = type; pool->type = type;
pool->front_len = front_len; pool->front_len = front_len;
pool->max_data_items = max_data_items;
pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
if (!pool->pool) if (!pool->pool)
return -ENOMEM; return -ENOMEM;
...@@ -53,18 +56,21 @@ void ceph_msgpool_destroy(struct ceph_msgpool *pool) ...@@ -53,18 +56,21 @@ void ceph_msgpool_destroy(struct ceph_msgpool *pool)
mempool_destroy(pool->pool); mempool_destroy(pool->pool);
} }
struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len,
int front_len) int max_data_items)
{ {
struct ceph_msg *msg; struct ceph_msg *msg;
if (front_len > pool->front_len) { if (front_len > pool->front_len ||
dout("msgpool_get %s need front %d, pool size is %d\n", max_data_items > pool->max_data_items) {
pool->name, front_len, pool->front_len); pr_warn_ratelimited("%s need %d/%d, pool %s has %d/%d\n",
WARN_ON(1); __func__, front_len, max_data_items, pool->name,
pool->front_len, pool->max_data_items);
WARN_ON_ONCE(1);
/* try to alloc a fresh message */ /* try to alloc a fresh message */
return ceph_msg_new(pool->type, front_len, GFP_NOFS, false); return ceph_msg_new2(pool->type, front_len, max_data_items,
GFP_NOFS, false);
} }
msg = mempool_alloc(pool->pool, GFP_NOFS); msg = mempool_alloc(pool->pool, GFP_NOFS);
...@@ -80,6 +86,9 @@ void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) ...@@ -80,6 +86,9 @@ void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
msg->front.iov_len = pool->front_len; msg->front.iov_len = pool->front_len;
msg->hdr.front_len = cpu_to_le32(pool->front_len); msg->hdr.front_len = cpu_to_le32(pool->front_len);
msg->data_length = 0;
msg->num_data_items = 0;
kref_init(&msg->kref); /* retake single ref */ kref_init(&msg->kref); /* retake single ref */
mempool_free(msg, pool->pool); mempool_free(msg, pool->pool);
} }
This diff is collapsed.
...@@ -6,6 +6,26 @@ ...@@ -6,6 +6,26 @@
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/ceph/pagelist.h> #include <linux/ceph/pagelist.h>
struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags)
{
struct ceph_pagelist *pl;
pl = kmalloc(sizeof(*pl), gfp_flags);
if (!pl)
return NULL;
INIT_LIST_HEAD(&pl->head);
pl->mapped_tail = NULL;
pl->length = 0;
pl->room = 0;
INIT_LIST_HEAD(&pl->free_list);
pl->num_pages_free = 0;
refcount_set(&pl->refcnt, 1);
return pl;
}
EXPORT_SYMBOL(ceph_pagelist_alloc);
static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
{ {
if (pl->mapped_tail) { if (pl->mapped_tail) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment