Commit 8a05abd0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-5.15-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:

 - a set of patches to address fsync stalls caused by depending on
   periodic rather than triggered MDS journal flushes in some cases
   (Xiubo Li)

 - a fix for mtime effectively not getting updated in case of competing
   writers (Jeff Layton)

 - a couple of fixes for inode reference leaks and various WARNs after
   "umount -f" (Xiubo Li)

 - a new ceph.auth_mds extended attribute (Jeff Layton)

 - a smattering of fixups and cleanups from Jeff, Xiubo and Colin.

* tag 'ceph-for-5.15-rc1' of git://github.com/ceph/ceph-client:
  ceph: fix dereference of null pointer cf
  ceph: drop the mdsc_get_session/put_session dout messages
  ceph: lockdep annotations for try_nonblocking_invalidate
  ceph: don't WARN if we're forcibly removing the session caps
  ceph: don't WARN if we're force umounting
  ceph: remove the capsnaps when removing caps
  ceph: request Fw caps before updating the mtime in ceph_write_iter
  ceph: reconnect to the export targets on new mdsmaps
  ceph: print more information when we can't find snaprealm
  ceph: add ceph_change_snap_realm() helper
  ceph: remove redundant initializations from mdsc and session
  ceph: cancel delayed work instead of flushing on mdsc teardown
  ceph: add a new vxattr to return auth mds for an inode
  ceph: remove some defunct forward declarations
  ceph: flush the mdlog before waiting on unsafe reqs
  ceph: flush mdlog before umounting
  ceph: make iterate_sessions a global symbol
  ceph: make ceph_create_session_msg a global symbol
  ceph: fix comment about short copies in ceph_write_end
  ceph: fix memory leak on decode error in ceph_handle_caps
parents 34c59da4 05a444d3
......@@ -1281,8 +1281,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
inode, page, (int)pos, (int)copied, (int)len);
/* zero the stale part of the page if we did a short copy */
if (!PageUptodate(page)) {
/* just return that nothing was copied on a short copy */
if (copied < len) {
copied = 0;
goto out;
......
......@@ -26,12 +26,6 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci);
void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp);
void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci);
int ceph_readpage_from_fscache(struct inode *inode, struct page *page);
int ceph_readpages_from_fscache(struct inode *inode,
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages);
static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{
ci->fscache = NULL;
......
This diff is collapsed.
......@@ -1722,32 +1722,26 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
err = file_remove_privs(file);
if (err)
down_read(&osdc->lock);
map_flags = osdc->osdmap->flags;
pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
up_read(&osdc->lock);
if ((map_flags & CEPH_OSDMAP_FULL) ||
(pool_flags & CEPH_POOL_FLAG_FULL)) {
err = -ENOSPC;
goto out;
}
err = file_update_time(file);
err = file_remove_privs(file);
if (err)
goto out;
inode_inc_iversion_raw(inode);
if (ci->i_inline_version != CEPH_INLINE_NONE) {
err = ceph_uninline_data(file, NULL);
if (err < 0)
goto out;
}
down_read(&osdc->lock);
map_flags = osdc->osdmap->flags;
pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
up_read(&osdc->lock);
if ((map_flags & CEPH_OSDMAP_FULL) ||
(pool_flags & CEPH_POOL_FLAG_FULL)) {
err = -ENOSPC;
goto out;
}
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, count, i_size_read(inode));
if (fi->fmode & CEPH_FILE_MODE_LAZY)
......@@ -1759,6 +1753,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err < 0)
goto out;
err = file_update_time(file);
if (err)
goto out_caps;
inode_inc_iversion_raw(inode);
dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
......@@ -1842,6 +1842,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
goto out_unlocked;
out_caps:
ceph_put_cap_refs(ci, got);
out:
if (direct_lock)
ceph_end_io_direct(inode);
......
......@@ -581,16 +581,9 @@ void ceph_evict_inode(struct inode *inode)
*/
if (ci->i_snap_realm) {
if (ceph_snap(inode) == CEPH_NOSNAP) {
struct ceph_snap_realm *realm = ci->i_snap_realm;
dout(" dropping residual ref to snap realm %p\n",
realm);
spin_lock(&realm->inodes_with_caps_lock);
list_del_init(&ci->i_snap_realm_item);
ci->i_snap_realm = NULL;
if (realm->ino == ci->i_vino.ino)
realm->inode = NULL;
spin_unlock(&realm->inodes_with_caps_lock);
ceph_put_snap_realm(mdsc, realm);
ci->i_snap_realm);
ceph_change_snap_realm(inode, NULL);
} else {
ceph_put_snapid_map(mdsc, ci->i_snapid_map);
ci->i_snap_realm = NULL;
......
This diff is collapsed.
......@@ -522,6 +522,11 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
kref_put(&req->r_kref, ceph_mdsc_release_request);
}
extern void send_flush_mdlog(struct ceph_mds_session *s);
extern void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc,
void (*cb)(struct ceph_mds_session *),
bool check_state);
extern struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq);
extern void __ceph_queue_cap_release(struct ceph_mds_session *session,
struct ceph_cap *cap);
extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
......
......@@ -122,6 +122,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
int err;
u8 mdsmap_v;
u16 mdsmap_ev;
u32 target;
m = kzalloc(sizeof(*m), GFP_NOFS);
if (!m)
......@@ -260,9 +261,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
sizeof(u32), GFP_NOFS);
if (!info->export_targets)
goto nomem;
for (j = 0; j < num_export_targets; j++)
info->export_targets[j] =
ceph_decode_32(&pexport_targets);
for (j = 0; j < num_export_targets; j++) {
target = ceph_decode_32(&pexport_targets);
if (target >= m->possible_max_rank) {
err = -EIO;
goto corrupt;
}
info->export_targets[j] = target;
}
} else {
info->export_targets = NULL;
}
......
......@@ -302,6 +302,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
if (!m)
return;
cancel_delayed_work_sync(&m->delayed_work);
percpu_counter_destroy(&m->total_inodes);
percpu_counter_destroy(&m->opened_inodes);
percpu_counter_destroy(&m->i_caps_mis);
......@@ -309,8 +311,6 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
percpu_counter_destroy(&m->d_lease_mis);
percpu_counter_destroy(&m->d_lease_hit);
cancel_delayed_work_sync(&m->delayed_work);
ceph_put_mds_session(m->session);
}
......
......@@ -849,6 +849,43 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
dout("flush_snaps done\n");
}
/**
* ceph_change_snap_realm - change the snap_realm for an inode
* @inode: inode to move to new snap realm
* @realm: new realm to move inode into (may be NULL)
*
* Detach an inode from its old snaprealm (if any) and attach it to
* the new snaprealm (if any). The old snap realm reference held by
* the inode is put. If realm is non-NULL, then the caller's reference
* to it is taken over by the inode.
*/
void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_snap_realm *oldrealm = ci->i_snap_realm;
lockdep_assert_held(&ci->i_ceph_lock);
if (oldrealm) {
spin_lock(&oldrealm->inodes_with_caps_lock);
list_del_init(&ci->i_snap_realm_item);
if (oldrealm->ino == ci->i_vino.ino)
oldrealm->inode = NULL;
spin_unlock(&oldrealm->inodes_with_caps_lock);
ceph_put_snap_realm(mdsc, oldrealm);
}
ci->i_snap_realm = realm;
if (realm) {
spin_lock(&realm->inodes_with_caps_lock);
list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps);
if (realm->ino == ci->i_vino.ino)
realm->inode = inode;
spin_unlock(&realm->inodes_with_caps_lock);
}
}
/*
* Handle a snap notification from the MDS.
......@@ -935,7 +972,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
};
struct inode *inode = ceph_find_inode(sb, vino);
struct ceph_inode_info *ci;
struct ceph_snap_realm *oldrealm;
if (!inode)
continue;
......@@ -960,27 +996,10 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
}
dout(" will move %p to split realm %llx %p\n",
inode, realm->ino, realm);
/*
* Move the inode to the new realm
*/
oldrealm = ci->i_snap_realm;
spin_lock(&oldrealm->inodes_with_caps_lock);
list_del_init(&ci->i_snap_realm_item);
spin_unlock(&oldrealm->inodes_with_caps_lock);
spin_lock(&realm->inodes_with_caps_lock);
list_add(&ci->i_snap_realm_item,
&realm->inodes_with_caps);
ci->i_snap_realm = realm;
if (realm->ino == ci->i_vino.ino)
realm->inode = inode;
spin_unlock(&realm->inodes_with_caps_lock);
spin_unlock(&ci->i_ceph_lock);
ceph_get_snap_realm(mdsc, realm);
ceph_put_snap_realm(mdsc, oldrealm);
ceph_change_snap_realm(inode, realm);
spin_unlock(&ci->i_ceph_lock);
iput(inode);
continue;
......
......@@ -46,6 +46,7 @@ const char *ceph_session_op_name(int op)
case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack";
case CEPH_SESSION_FORCE_RO: return "force_ro";
case CEPH_SESSION_REJECT: return "reject";
case CEPH_SESSION_REQUEST_FLUSH_MDLOG: return "flush_mdlog";
}
return "???";
}
......
......@@ -418,7 +418,6 @@ struct ceph_inode_info {
struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */
};
int i_snap_realm_counter; /* snap realm (if caps) */
struct list_head i_snap_realm_item;
struct list_head i_snap_flush_item;
struct timespec64 i_btime;
......@@ -929,6 +928,7 @@ extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
extern int ceph_update_snap_trace(struct ceph_mds_client *m,
void *p, void *e, bool deletion,
struct ceph_snap_realm **realm_ret);
void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm);
extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct ceph_msg *msg);
......@@ -1138,6 +1138,7 @@ extern void ceph_add_cap(struct inode *inode,
unsigned cap, unsigned seq, u64 realmino, int flags,
struct ceph_cap **new_cap);
extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
extern void ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
extern void __ceph_remove_caps(struct ceph_inode_info *ci);
extern void ceph_put_cap(struct ceph_mds_client *mdsc,
struct ceph_cap *cap);
......@@ -1163,6 +1164,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc);
extern void __ceph_remove_capsnap(struct inode *inode,
struct ceph_cap_snap *capsnap,
bool *wake_ci, bool *wake_mdsc);
extern void ceph_remove_capsnap(struct inode *inode,
struct ceph_cap_snap *capsnap,
bool *wake_ci, bool *wake_mdsc);
extern void ceph_flush_snaps(struct ceph_inode_info *ci,
struct ceph_mds_session **psession);
extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
......
......@@ -340,6 +340,18 @@ static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
ceph_cap_string(issued), issued);
}
static ssize_t ceph_vxattrcb_auth_mds(struct ceph_inode_info *ci,
char *val, size_t size)
{
int ret;
spin_lock(&ci->i_ceph_lock);
ret = ceph_fmt_xattr(val, size, "%d",
ci->i_auth_cap ? ci->i_auth_cap->session->s_mds : -1);
spin_unlock(&ci->i_ceph_lock);
return ret;
}
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
#define CEPH_XATTR_NAME2(_type, _name, _name2) \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
......@@ -473,6 +485,13 @@ static struct ceph_vxattr ceph_common_vxattrs[] = {
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.auth_mds",
.name_size = sizeof("ceph.auth_mds"),
.getxattr_cb = ceph_vxattrcb_auth_mds,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{ .name = NULL, 0 } /* Required table terminator */
};
......
......@@ -299,6 +299,7 @@ enum {
CEPH_SESSION_FLUSHMSG_ACK,
CEPH_SESSION_FORCE_RO,
CEPH_SESSION_REJECT,
CEPH_SESSION_REQUEST_FLUSH_MDLOG,
};
extern const char *ceph_session_op_name(int op);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment