Commit dc594c39 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-4.18-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The main piece is a set of libceph changes that revamps how OSD
  requests are aborted, improving CephFS ENOSPC handling and making
  "umount -f" actually work (Zheng and myself).

  The rest is mostly mount option handling cleanups from Chengguang and
  assorted fixes from Zheng, Luis and Dongsheng.

* tag 'ceph-for-4.18-rc1' of git://github.com/ceph/ceph-client: (31 commits)
  rbd: flush rbd_dev->watch_dwork after watch is unregistered
  ceph: update description of some mount options
  ceph: show ino32 if the value is different with default
  ceph: strengthen rsize/wsize/readdir_max_bytes validation
  ceph: fix alignment of rasize
  ceph: fix use-after-free in ceph_statfs()
  ceph: prevent i_version from going back
  ceph: fix wrong check for the case of updating link count
  libceph: allocate the locator string with GFP_NOFAIL
  libceph: make abort_on_full a per-osdc setting
  libceph: don't abort reads in ceph_osdc_abort_on_full()
  libceph: avoid a use-after-free during map check
  libceph: don't warn if req->r_abort_on_full is set
  libceph: use for_each_request() in ceph_osdc_abort_on_full()
  libceph: defer __complete_request() to a workqueue
  libceph: move more code into __complete_request()
  libceph: no need to call flush_workqueue() before destruction
  ceph: flush pending works before shutdown super
  ceph: abort osd requests on force umount
  libceph: introduce ceph_osdc_abort_requests()
  ...
parents e7655d2b 23edca86
......@@ -105,15 +105,13 @@ Mount Options
address its connection to the monitor originates from.
wsize=X
Specify the maximum write size in bytes. By default there is no
maximum. Ceph will normally size writes based on the file stripe
size.
Specify the maximum write size in bytes. Default: 16 MB.
rsize=X
Specify the maximum read size in bytes. Default: 64 MB.
Specify the maximum read size in bytes. Default: 16 MB.
rasize=X
Specify the maximum readahead. Default: 8 MB.
Specify the maximum readahead size in bytes. Default: 8 MB.
mount_timeout=X
Specify the timeout value for mount (in seconds), in the case
......
......@@ -2339,6 +2339,7 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
{
unsigned int num_osd_ops = obj_req->osd_req->r_num_ops;
int ret;
dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT);
......@@ -2353,6 +2354,11 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
if (!obj_req->osd_req)
return -ENOMEM;
ret = osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
"copyup");
if (ret)
return ret;
/*
* Only send non-zero copyup data to save some I/O and network
* bandwidth -- zero copyup data is equivalent to the object not
......@@ -2362,9 +2368,6 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
dout("%s obj_req %p detected zeroes\n", __func__, obj_req);
bytes = 0;
}
osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
"copyup");
osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0,
obj_req->copyup_bvecs,
obj_req->copyup_bvec_count,
......@@ -3397,7 +3400,6 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev)
{
dout("%s rbd_dev %p\n", __func__, rbd_dev);
cancel_delayed_work_sync(&rbd_dev->watch_dwork);
cancel_work_sync(&rbd_dev->acquired_lock_work);
cancel_work_sync(&rbd_dev->released_lock_work);
cancel_delayed_work_sync(&rbd_dev->lock_dwork);
......@@ -3415,6 +3417,7 @@ static void rbd_unregister_watch(struct rbd_device *rbd_dev)
rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
mutex_unlock(&rbd_dev->watch_mutex);
cancel_delayed_work_sync(&rbd_dev->watch_dwork);
ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
}
......
......@@ -1936,7 +1936,6 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
wr_req->r_mtime = ci->vfs_inode.i_mtime;
wr_req->r_abort_on_full = true;
err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
if (!err)
......
This diff is collapsed.
......@@ -1486,6 +1486,8 @@ const struct file_operations ceph_dir_fops = {
.release = ceph_release,
.unlocked_ioctl = ceph_ioctl,
.fsync = ceph_fsync,
.lock = ceph_lock,
.flock = ceph_flock,
};
const struct file_operations ceph_snapdir_fops = {
......
......@@ -895,7 +895,6 @@ static void ceph_aio_retry_work(struct work_struct *work)
req->r_callback = ceph_aio_complete_req;
req->r_inode = inode;
req->r_priv = aio_req;
req->r_abort_on_full = true;
ret = ceph_osdc_start_request(req->r_osdc, req, false);
out:
......
......@@ -739,7 +739,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_reply_inode *info = iinfo->in;
struct ceph_inode_info *ci = ceph_inode(inode);
int issued = 0, implemented, new_issued;
int issued, new_issued, info_caps;
struct timespec mtime, atime, ctime;
struct ceph_buffer *xattr_blob = NULL;
struct ceph_string *pool_ns = NULL;
......@@ -754,8 +754,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
inode, ceph_vinop(inode), le64_to_cpu(info->version),
ci->i_version);
info_caps = le32_to_cpu(info->cap.caps);
/* prealloc new cap struct */
if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP)
if (info_caps && ceph_snap(inode) == CEPH_NOSNAP)
new_cap = ceph_get_cap(mdsc, caps_reservation);
/*
......@@ -792,9 +794,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
le64_to_cpu(info->version) > (ci->i_version & ~1)))
new_version = true;
issued = __ceph_caps_issued(ci, &implemented);
issued |= implemented | __ceph_caps_dirty(ci);
new_issued = ~issued & le32_to_cpu(info->cap.caps);
__ceph_caps_issued(ci, &issued);
issued |= __ceph_caps_dirty(ci);
new_issued = ~issued & info_caps;
/* update inode */
inode->i_rdev = le32_to_cpu(info->rdev);
......@@ -826,6 +828,11 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
&ctime, &mtime, &atime);
}
if (new_version || (info_caps & CEPH_CAP_FILE_SHARED)) {
ci->i_files = le64_to_cpu(info->files);
ci->i_subdirs = le64_to_cpu(info->subdirs);
}
if (new_version ||
(new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
s64 old_pool = ci->i_layout.pool_id;
......@@ -854,6 +861,18 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
}
}
/* layout and rstat are not tracked by capability, update them if
* the inode info is from auth mds */
if (new_version || (info->cap.flags & CEPH_CAP_FLAG_AUTH)) {
if (S_ISDIR(inode->i_mode)) {
ci->i_dir_layout = iinfo->dir_layout;
ci->i_rbytes = le64_to_cpu(info->rbytes);
ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
ceph_decode_timespec(&ci->i_rctime, &info->rctime);
}
}
/* xattrs */
/* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
if ((ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) &&
......@@ -870,7 +889,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
}
/* finally update i_version */
ci->i_version = le64_to_cpu(info->version);
if (le64_to_cpu(info->version) > ci->i_version)
ci->i_version = le64_to_cpu(info->version);
inode->i_mapping->a_ops = &ceph_aops;
......@@ -918,15 +938,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
case S_IFDIR:
inode->i_op = &ceph_dir_iops;
inode->i_fop = &ceph_dir_fops;
ci->i_dir_layout = iinfo->dir_layout;
ci->i_files = le64_to_cpu(info->files);
ci->i_subdirs = le64_to_cpu(info->subdirs);
ci->i_rbytes = le64_to_cpu(info->rbytes);
ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
ceph_decode_timespec(&ci->i_rctime, &info->rctime);
break;
default:
pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
......@@ -934,12 +945,11 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
}
/* were we issued a capability? */
if (info->cap.caps) {
if (info_caps) {
if (ceph_snap(inode) == CEPH_NOSNAP) {
unsigned caps = le32_to_cpu(info->cap.caps);
ceph_add_cap(inode, session,
le64_to_cpu(info->cap.cap_id),
cap_fmode, caps,
cap_fmode, info_caps,
le32_to_cpu(info->cap.wanted),
le32_to_cpu(info->cap.seq),
le32_to_cpu(info->cap.mseq),
......@@ -949,7 +959,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
/* set dir completion flag? */
if (S_ISDIR(inode->i_mode) &&
ci->i_files == 0 && ci->i_subdirs == 0 &&
(caps & CEPH_CAP_FILE_SHARED) &&
(info_caps & CEPH_CAP_FILE_SHARED) &&
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
!__ceph_dir_is_complete(ci)) {
dout(" marking %p complete (empty)\n", inode);
......@@ -962,8 +972,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
wake = true;
} else {
dout(" %p got snap_caps %s\n", inode,
ceph_cap_string(le32_to_cpu(info->cap.caps)));
ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
ceph_cap_string(info_caps));
ci->i_snap_caps |= info_caps;
if (cap_fmode >= 0)
__ceph_get_fmode(ci, cap_fmode);
}
......@@ -978,8 +988,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
ci->i_inline_version = iinfo->inline_version;
if (ci->i_inline_version != CEPH_INLINE_NONE &&
(locked_page ||
(le32_to_cpu(info->cap.caps) & cache_caps)))
(locked_page || (info_caps & cache_caps)))
fill_inline = true;
}
......@@ -2178,6 +2187,7 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int mode;
int err;
if (ceph_snap(inode) == CEPH_SNAPDIR) {
......@@ -2190,7 +2200,8 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
return 0;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
mode = (mask & CEPH_STAT_RSTAT) ? USE_AUTH_MDS : USE_ANY_MDS;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode);
if (IS_ERR(req))
return PTR_ERR(req);
req->r_inode = inode;
......@@ -2261,6 +2272,14 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
stat->size = ci->i_files + ci->i_subdirs;
stat->blocks = 0;
stat->blksize = 65536;
/*
* Some applications rely on the number of st_nlink
* value on directories to be either 0 (if unlinked)
* or 2 + number of subdirectories.
*/
if (stat->nlink == 1)
/* '.' + '..' + subdirs */
stat->nlink = 1 + 1 + ci->i_subdirs;
}
}
return err;
......
......@@ -45,7 +45,7 @@ static void ceph_put_super(struct super_block *s)
static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
struct ceph_monmap *monmap = fsc->client->monc.monmap;
struct ceph_mon_client *monc = &fsc->client->monc;
struct ceph_statfs st;
u64 fsid;
int err;
......@@ -58,7 +58,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
}
dout("statfs\n");
err = ceph_monc_do_statfs(&fsc->client->monc, data_pool, &st);
err = ceph_monc_do_statfs(monc, data_pool, &st);
if (err < 0)
return err;
......@@ -94,8 +94,11 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = NAME_MAX;
/* Must convert the fsid, for consistent values across arches */
fsid = le64_to_cpu(*(__le64 *)(&monmap->fsid)) ^
le64_to_cpu(*((__le64 *)&monmap->fsid + 1));
mutex_lock(&monc->mutex);
fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^
le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1));
mutex_unlock(&monc->mutex);
buf->f_fsid.val[0] = fsid & 0xffffffff;
buf->f_fsid.val[1] = fsid >> 32;
......@@ -256,19 +259,19 @@ static int parse_fsopt_token(char *c, void *private)
break;
/* misc */
case Opt_wsize:
if (intval < PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE)
if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE)
return -EINVAL;
fsopt->wsize = ALIGN(intval, PAGE_SIZE);
break;
case Opt_rsize:
if (intval < PAGE_SIZE || intval > CEPH_MAX_READ_SIZE)
if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_READ_SIZE)
return -EINVAL;
fsopt->rsize = ALIGN(intval, PAGE_SIZE);
break;
case Opt_rasize:
if (intval < 0)
return -EINVAL;
fsopt->rasize = ALIGN(intval + PAGE_SIZE - 1, PAGE_SIZE);
fsopt->rasize = ALIGN(intval, PAGE_SIZE);
break;
case Opt_caps_wanted_delay_min:
if (intval < 1)
......@@ -286,7 +289,7 @@ static int parse_fsopt_token(char *c, void *private)
fsopt->max_readdir = intval;
break;
case Opt_readdir_max_bytes:
if (intval < PAGE_SIZE && intval != 0)
if (intval < (int)PAGE_SIZE && intval != 0)
return -EINVAL;
fsopt->max_readdir_bytes = intval;
break;
......@@ -534,6 +537,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",noasyncreaddir");
if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
seq_puts(m, ",nodcache");
if (fsopt->flags & CEPH_MOUNT_OPT_INO32)
seq_puts(m, ",ino32");
if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
seq_show_option(m, "fsc", fsopt->fscache_uniq);
}
......@@ -551,7 +556,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
if (fsopt->mds_namespace)
seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
if (fsopt->wsize)
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%d", fsopt->wsize);
if (fsopt->rsize != CEPH_MAX_READ_SIZE)
seq_printf(m, ",rsize=%d", fsopt->rsize);
......@@ -616,7 +621,9 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
err = PTR_ERR(fsc->client);
goto fail;
}
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
fsc->client->osdc.abort_on_full = true;
if (!fsopt->mds_namespace) {
ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
......@@ -674,6 +681,13 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
return ERR_PTR(err);
}
static void flush_fs_workqueues(struct ceph_fs_client *fsc)
{
flush_workqueue(fsc->wb_wq);
flush_workqueue(fsc->pg_inv_wq);
flush_workqueue(fsc->trunc_wq);
}
static void destroy_fs_client(struct ceph_fs_client *fsc)
{
dout("destroy_fs_client %p\n", fsc);
......@@ -793,6 +807,7 @@ static void ceph_umount_begin(struct super_block *sb)
if (!fsc)
return;
fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
ceph_mdsc_force_umount(fsc->mdsc);
return;
}
......@@ -1088,6 +1103,8 @@ static void ceph_kill_sb(struct super_block *s)
dout("kill_sb %p\n", s);
ceph_mdsc_pre_umount(fsc->mdsc);
flush_fs_workqueues(fsc);
generic_shutdown_super(s);
fsc->client->extra_mon_dispatch = NULL;
......
......@@ -50,10 +50,14 @@ struct ceph_vxattr {
size_t name_size; /* strlen(name) + 1 (for '\0') */
size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
size_t size);
bool readonly, hidden;
bool (*exists_cb)(struct ceph_inode_info *ci);
unsigned int flags;
};
#define VXATTR_FLAG_READONLY (1<<0)
#define VXATTR_FLAG_HIDDEN (1<<1)
#define VXATTR_FLAG_RSTAT (1<<2)
/* layouts */
static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
......@@ -262,32 +266,31 @@ static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
#define CEPH_XATTR_NAME2(_type, _name, _name2) \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
#define XATTR_NAME_CEPH(_type, _name) \
#define XATTR_NAME_CEPH(_type, _name, _flags) \
{ \
.name = CEPH_XATTR_NAME(_type, _name), \
.name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
.readonly = true, \
.hidden = false, \
.exists_cb = NULL, \
.exists_cb = NULL, \
.flags = (VXATTR_FLAG_READONLY | _flags), \
}
#define XATTR_RSTAT_FIELD(_type, _name) \
XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT)
#define XATTR_LAYOUT_FIELD(_type, _name, _field) \
{ \
.name = CEPH_XATTR_NAME2(_type, _name, _field), \
.name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
.getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
.readonly = false, \
.hidden = true, \
.exists_cb = ceph_vxattrcb_layout_exists, \
.flags = VXATTR_FLAG_HIDDEN, \
}
#define XATTR_QUOTA_FIELD(_type, _name) \
{ \
.name = CEPH_XATTR_NAME(_type, _name), \
.name_size = sizeof(CEPH_XATTR_NAME(_type, _name)), \
.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
.readonly = false, \
.hidden = true, \
.exists_cb = ceph_vxattrcb_quota_exists, \
.flags = VXATTR_FLAG_HIDDEN, \
}
static struct ceph_vxattr ceph_dir_vxattrs[] = {
......@@ -295,30 +298,28 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
.name = "ceph.dir.layout",
.name_size = sizeof("ceph.dir.layout"),
.getxattr_cb = ceph_vxattrcb_layout,
.readonly = false,
.hidden = true,
.exists_cb = ceph_vxattrcb_layout_exists,
.flags = VXATTR_FLAG_HIDDEN,
},
XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
XATTR_LAYOUT_FIELD(dir, layout, object_size),
XATTR_LAYOUT_FIELD(dir, layout, pool),
XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
XATTR_NAME_CEPH(dir, entries),
XATTR_NAME_CEPH(dir, files),
XATTR_NAME_CEPH(dir, subdirs),
XATTR_NAME_CEPH(dir, rentries),
XATTR_NAME_CEPH(dir, rfiles),
XATTR_NAME_CEPH(dir, rsubdirs),
XATTR_NAME_CEPH(dir, rbytes),
XATTR_NAME_CEPH(dir, rctime),
XATTR_NAME_CEPH(dir, entries, 0),
XATTR_NAME_CEPH(dir, files, 0),
XATTR_NAME_CEPH(dir, subdirs, 0),
XATTR_RSTAT_FIELD(dir, rentries),
XATTR_RSTAT_FIELD(dir, rfiles),
XATTR_RSTAT_FIELD(dir, rsubdirs),
XATTR_RSTAT_FIELD(dir, rbytes),
XATTR_RSTAT_FIELD(dir, rctime),
{
.name = "ceph.quota",
.name_size = sizeof("ceph.quota"),
.getxattr_cb = ceph_vxattrcb_quota,
.readonly = false,
.hidden = true,
.exists_cb = ceph_vxattrcb_quota_exists,
.flags = VXATTR_FLAG_HIDDEN,
},
XATTR_QUOTA_FIELD(quota, max_bytes),
XATTR_QUOTA_FIELD(quota, max_files),
......@@ -333,9 +334,8 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
.name = "ceph.file.layout",
.name_size = sizeof("ceph.file.layout"),
.getxattr_cb = ceph_vxattrcb_layout,
.readonly = false,
.hidden = true,
.exists_cb = ceph_vxattrcb_layout_exists,
.flags = VXATTR_FLAG_HIDDEN,
},
XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
XATTR_LAYOUT_FIELD(file, layout, stripe_count),
......@@ -374,9 +374,10 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
struct ceph_vxattr *vxattr;
size_t size = 0;
for (vxattr = vxattrs; vxattr->name; vxattr++)
if (!vxattr->hidden)
for (vxattr = vxattrs; vxattr->name; vxattr++) {
if (!(vxattr->flags & VXATTR_FLAG_HIDDEN))
size += vxattr->name_size;
}
return size;
}
......@@ -809,7 +810,10 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
/* let's see if a virtual xattr was requested */
vxattr = ceph_match_vxattr(inode, name);
if (vxattr) {
err = ceph_do_getattr(inode, 0, true);
int mask = 0;
if (vxattr->flags & VXATTR_FLAG_RSTAT)
mask |= CEPH_STAT_RSTAT;
err = ceph_do_getattr(inode, mask, true);
if (err)
return err;
err = -ENODATA;
......@@ -919,7 +923,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
err = namelen;
if (vxattrs) {
for (i = 0; vxattrs[i].name; i++) {
if (!vxattrs[i].hidden &&
if (!(vxattrs[i].flags & VXATTR_FLAG_HIDDEN) &&
!(vxattrs[i].exists_cb &&
!vxattrs[i].exists_cb(ci))) {
len = sprintf(names, "%s", vxattrs[i].name);
......@@ -1024,7 +1028,7 @@ int __ceph_setxattr(struct inode *inode, const char *name,
vxattr = ceph_match_vxattr(inode, name);
if (vxattr) {
if (vxattr->readonly)
if (vxattr->flags & VXATTR_FLAG_READONLY)
return -EOPNOTSUPP;
if (value && !strncmp(vxattr->name, "ceph.quota", 10))
check_realm = true;
......
......@@ -628,6 +628,7 @@ int ceph_flags_to_mode(int flags);
CEPH_CAP_XATTR_SHARED)
#define CEPH_STAT_CAP_INLINE_DATA (CEPH_CAP_FILE_SHARED | \
CEPH_CAP_FILE_RD)
#define CEPH_STAT_RSTAT CEPH_CAP_FILE_WREXTEND
#define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \
CEPH_CAP_LINK_SHARED | \
......
......@@ -170,6 +170,7 @@ struct ceph_osd_request {
u64 r_tid; /* unique for this client */
struct rb_node r_node;
struct rb_node r_mc_node; /* map check */
struct work_struct r_complete_work;
struct ceph_osd *r_osd;
struct ceph_osd_request_target r_t;
......@@ -201,7 +202,6 @@ struct ceph_osd_request {
struct timespec r_mtime; /* ditto */
u64 r_data_offset; /* ditto */
bool r_linger; /* don't resend on failure */
bool r_abort_on_full; /* return ENOSPC when full */
/* internal */
unsigned long r_stamp; /* jiffies, send or check time */
......@@ -347,6 +347,8 @@ struct ceph_osd_client {
struct rb_root linger_map_checks;
atomic_t num_requests;
atomic_t num_homeless;
bool abort_on_full; /* abort w/ ENOSPC when full */
int abort_err;
struct delayed_work timeout_work;
struct delayed_work osds_timeout_work;
#ifdef CONFIG_DEBUG_FS
......@@ -359,6 +361,7 @@ struct ceph_osd_client {
struct ceph_msgpool msgpool_op_reply;
struct workqueue_struct *notify_wq;
struct workqueue_struct *completion_wq;
};
static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
......@@ -378,6 +381,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
struct ceph_msg *msg);
void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
extern void osd_req_op_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode, u32 flags);
......@@ -440,7 +444,7 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
struct page **pages, u64 length,
u32 alignment, bool pages_from_pool,
bool own_pages);
extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
extern int osd_req_op_cls_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode,
const char *class, const char *method);
extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
......
......@@ -279,10 +279,10 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
const struct ceph_osds *new_acting,
bool any_change);
int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
const struct ceph_object_id *oid,
const struct ceph_object_locator *oloc,
struct ceph_pg *raw_pgid);
void __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
const struct ceph_object_id *oid,
const struct ceph_object_locator *oloc,
struct ceph_pg *raw_pgid);
int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
const struct ceph_object_id *oid,
const struct ceph_object_locator *oloc,
......
......@@ -168,12 +168,6 @@ static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
static struct lock_class_key socket_class;
#endif
/*
* When skipping (ignoring) a block of input we read it into a "skip
* buffer," which is this many bytes in size.
*/
#define SKIP_BUF_SIZE 1024
static void queue_con(struct ceph_connection *con);
static void cancel_con(struct ceph_connection *con);
static void ceph_con_workfn(struct work_struct *);
......@@ -520,12 +514,18 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return 0;
}
/*
* If @buf is NULL, discard up to @len bytes.
*/
static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
{
struct kvec iov = {buf, len};
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
int r;
if (!buf)
msg.msg_flags |= MSG_TRUNC;
iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
r = sock_recvmsg(sock, &msg, msg.msg_flags);
if (r == -EAGAIN)
......@@ -2575,9 +2575,6 @@ static int try_write(struct ceph_connection *con)
con->state != CON_STATE_OPEN)
return 0;
more:
dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
/* open the socket first? */
if (con->state == CON_STATE_PREOPEN) {
BUG_ON(con->sock);
......@@ -2598,7 +2595,8 @@ static int try_write(struct ceph_connection *con)
}
}
more_kvec:
more:
dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
BUG_ON(!con->sock);
/* kvec data queued? */
......@@ -2623,7 +2621,7 @@ static int try_write(struct ceph_connection *con)
ret = write_partial_message_data(con);
if (ret == 1)
goto more_kvec; /* we need to send the footer, too! */
goto more; /* we need to send the footer, too! */
if (ret == 0)
goto out;
if (ret < 0) {
......@@ -2659,8 +2657,6 @@ static int try_write(struct ceph_connection *con)
return ret;
}
/*
* Read what we can from the socket.
*/
......@@ -2721,16 +2717,11 @@ static int try_read(struct ceph_connection *con)
if (con->in_base_pos < 0) {
/*
* skipping + discarding content.
*
* FIXME: there must be a better way to do this!
*/
static char buf[SKIP_BUF_SIZE];
int skip = min((int) sizeof (buf), -con->in_base_pos);
dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
ret = ceph_tcp_recvmsg(con->sock, buf, skip);
ret = ceph_tcp_recvmsg(con->sock, NULL, -con->in_base_pos);
if (ret <= 0)
goto out;
dout("skipped %d / %d bytes\n", ret, -con->in_base_pos);
con->in_base_pos += ret;
if (con->in_base_pos)
goto more;
......
This diff is collapsed.
......@@ -2146,10 +2146,10 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
* Should only be called with target_oid and target_oloc (as opposed to
* base_oid and base_oloc), since tiering isn't taken into account.
*/
int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
const struct ceph_object_id *oid,
const struct ceph_object_locator *oloc,
struct ceph_pg *raw_pgid)
void __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
const struct ceph_object_id *oid,
const struct ceph_object_locator *oloc,
struct ceph_pg *raw_pgid)
{
WARN_ON(pi->id != oloc->pool);
......@@ -2165,11 +2165,8 @@ int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
int nsl = oloc->pool_ns->len;
size_t total = nsl + 1 + oid->name_len;
if (total > sizeof(stack_buf)) {
buf = kmalloc(total, GFP_NOIO);
if (!buf)
return -ENOMEM;
}
if (total > sizeof(stack_buf))
buf = kmalloc(total, GFP_NOIO | __GFP_NOFAIL);
memcpy(buf, oloc->pool_ns->str, nsl);
buf[nsl] = '\037';
memcpy(buf + nsl + 1, oid->name, oid->name_len);
......@@ -2181,7 +2178,6 @@ int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
oid->name, nsl, oloc->pool_ns->str,
raw_pgid->pool, raw_pgid->seed);
}
return 0;
}
int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
......@@ -2195,7 +2191,8 @@ int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
if (!pi)
return -ENOENT;
return __ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
__ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
return 0;
}
EXPORT_SYMBOL(ceph_object_locator_to_pg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment