Commit 543212b3 authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov

ceph: close race between d_name_cmp() and update_dentry_lease()

d_name_cmp() and update_dentry_lease() lock and unlock dentry->d_lock
respectively. Dentry may get renamed between them. The fix is moving
the dentry name compare into update_dentry_lease().

This patch introduce two version of update_dentry_lease(). One version
is for the case that parent inode is locked. It does not need to check
parent/target inode and dentry name. Another version is for the case
that parent inode is not locked. It checks parent/target inode and
dentry name after locking dentry->d_lock.
Signed-off-by: default avatar"Yan, Zheng" <zyan@redhat.com>
Reviewed-by: default avatarJeff Layton <jlayton@redhat.com>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 74960773
......@@ -1028,59 +1028,38 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
}
/*
* caller should hold session s_mutex.
* caller should hold session s_mutex and dentry->d_lock.
*/
static void update_dentry_lease(struct dentry *dentry,
struct ceph_mds_reply_lease *lease,
struct ceph_mds_session *session,
unsigned long from_time,
struct ceph_vino *tgt_vino,
struct ceph_vino *dir_vino)
static void __update_dentry_lease(struct inode *dir, struct dentry *dentry,
struct ceph_mds_reply_lease *lease,
struct ceph_mds_session *session,
unsigned long from_time,
struct ceph_mds_session **old_lease_session)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
long unsigned duration = le32_to_cpu(lease->duration_ms);
long unsigned ttl = from_time + (duration * HZ) / 1000;
long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
struct inode *dir;
struct ceph_mds_session *old_lease_session = NULL;
/*
* Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
* we expect a negative dentry.
*/
if (!tgt_vino && d_really_is_positive(dentry))
return;
if (tgt_vino && (d_really_is_negative(dentry) ||
!ceph_ino_compare(d_inode(dentry), tgt_vino)))
return;
spin_lock(&dentry->d_lock);
dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
dentry, duration, ttl);
dir = d_inode(dentry->d_parent);
/* make sure parent matches dir_vino */
if (!ceph_ino_compare(dir, dir_vino))
goto out_unlock;
/* only track leases on regular dentries */
if (ceph_snap(dir) != CEPH_NOSNAP)
goto out_unlock;
return;
di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen);
if (duration == 0) {
__ceph_dentry_dir_lease_touch(di);
goto out_unlock;
return;
}
if (di->lease_gen == session->s_cap_gen &&
time_before(ttl, di->time))
goto out_unlock; /* we already have a newer lease. */
return; /* we already have a newer lease. */
if (di->lease_session && di->lease_session != session) {
old_lease_session = di->lease_session;
*old_lease_session = di->lease_session;
di->lease_session = NULL;
}
......@@ -1093,6 +1072,62 @@ static void update_dentry_lease(struct dentry *dentry,
di->time = ttl;
__ceph_dentry_lease_touch(di);
}
static inline void update_dentry_lease(struct inode *dir, struct dentry *dentry,
struct ceph_mds_reply_lease *lease,
struct ceph_mds_session *session,
unsigned long from_time)
{
struct ceph_mds_session *old_lease_session = NULL;
spin_lock(&dentry->d_lock);
__update_dentry_lease(dir, dentry, lease, session, from_time,
&old_lease_session);
spin_unlock(&dentry->d_lock);
if (old_lease_session)
ceph_put_mds_session(old_lease_session);
}
/*
* update dentry lease without having parent inode locked
*/
static void update_dentry_lease_careful(struct dentry *dentry,
struct ceph_mds_reply_lease *lease,
struct ceph_mds_session *session,
unsigned long from_time,
char *dname, u32 dname_len,
struct ceph_vino *pdvino,
struct ceph_vino *ptvino)
{
struct inode *dir;
struct ceph_mds_session *old_lease_session = NULL;
spin_lock(&dentry->d_lock);
/* make sure dentry's name matches target */
if (dentry->d_name.len != dname_len ||
memcmp(dentry->d_name.name, dname, dname_len))
goto out_unlock;
dir = d_inode(dentry->d_parent);
/* make sure parent matches dvino */
if (!ceph_ino_compare(dir, pdvino))
goto out_unlock;
/* make sure dentry's inode matches target. NULL ptvino means that
* we expect a negative dentry */
if (ptvino) {
if (d_really_is_negative(dentry))
goto out_unlock;
if (!ceph_ino_compare(d_inode(dentry), ptvino))
goto out_unlock;
} else {
if (d_really_is_positive(dentry))
goto out_unlock;
}
__update_dentry_lease(dir, dentry, lease, session,
from_time, &old_lease_session);
out_unlock:
spin_unlock(&dentry->d_lock);
if (old_lease_session)
......@@ -1157,19 +1192,6 @@ static int splice_dentry(struct dentry **pdn, struct inode *in)
return 0;
}
static int d_name_cmp(struct dentry *dentry, const char *name, size_t len)
{
int ret;
/* take d_lock to ensure dentry->d_name stability */
spin_lock(&dentry->d_lock);
ret = dentry->d_name.len - len;
if (!ret)
ret = memcmp(dentry->d_name.name, name, len);
spin_unlock(&dentry->d_lock);
return ret;
}
/*
* Incorporate results into the local cache. This is either just
* one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
......@@ -1372,10 +1394,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
} else if (have_lease) {
if (d_unhashed(dn))
d_add(dn, NULL);
update_dentry_lease(dn, rinfo->dlease,
session,
req->r_request_started,
NULL, &dvino);
update_dentry_lease(dir, dn,
rinfo->dlease, session,
req->r_request_started);
}
goto done;
}
......@@ -1397,11 +1418,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
}
if (have_lease) {
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
update_dentry_lease(dn, rinfo->dlease, session,
req->r_request_started,
&tvino, &dvino);
update_dentry_lease(dir, dn,
rinfo->dlease, session,
req->r_request_started);
}
dout(" final dn %p\n", dn);
} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
......@@ -1419,27 +1438,20 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
err = splice_dentry(&req->r_dentry, in);
if (err < 0)
goto done;
} else if (rinfo->head->is_dentry &&
!d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) {
} else if (rinfo->head->is_dentry && req->r_dentry) {
/* parent inode is not locked, be carefull */
struct ceph_vino *ptvino = NULL;
if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
le32_to_cpu(rinfo->dlease->duration_ms)) {
dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
if (rinfo->head->is_target) {
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
ptvino = &tvino;
}
update_dentry_lease(req->r_dentry, rinfo->dlease,
session, req->r_request_started, ptvino,
&dvino);
} else {
dout("%s: no dentry lease or dir cap\n", __func__);
dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
if (rinfo->head->is_target) {
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
ptvino = &tvino;
}
update_dentry_lease_careful(req->r_dentry, rinfo->dlease,
session, req->r_request_started,
rinfo->dname, rinfo->dname_len,
&dvino, ptvino);
}
done:
dout("fill_trace done err=%d\n", err);
......@@ -1601,7 +1613,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
/* FIXME: release caps/leases if error occurs */
for (i = 0; i < rinfo->dir_nr; i++) {
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
struct ceph_vino tvino, dvino;
struct ceph_vino tvino;
dname.name = rde->name;
dname.len = rde->name_len;
......@@ -1702,9 +1714,9 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
ceph_dentry(dn)->offset = rde->offset;
dvino = ceph_vino(d_inode(parent));
update_dentry_lease(dn, rde->lease, req->r_session,
req->r_request_started, &tvino, &dvino);
update_dentry_lease(d_inode(parent), dn,
rde->lease, req->r_session,
req->r_request_started);
if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
ret = fill_readdir_cache(d_inode(parent), dn,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment