Commit c1b66fcc authored by Lai Siyao's avatar Lai Siyao Committed by Greg Kroah-Hartman

staging: lustre: fid: do open-by-fid by default

Currently client open-by-fid often packs name into the request,
but the name may be invalid, eg. NFS export, and even if it's
valid, it may cause inconsistency because this operation is done
on this fid, which is globally unique, but name not.

Since open-by-fid doesn't pack name, for striped dir we can't know
parent stripe fid on client, so we set parent fid the same as
child fid, and MDT has to find its parent fid from linkea (this is
already supported by MDT).

M_CHECK_STALE becomes obsolete.

Unset MDS_OPEN_FL_INTERNAL from open syscall flags, because these
flags are internally used, and should not be set from user space.

It's not necessary to store parent fid in lli_pfid, because MDT
can get it's parent fid from linkea, and now that DNE stripe
directory stores master inode fid in lli_pfid, stop storing parent
fid to avoid conflict.
Signed-off-by: default avatarLai Siyao <lai.siyao@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3544
Reviewed-on: http://review.whamcloud.com/7476
Reviewed-on: http://review.whamcloud.com/10692Reviewed-by: default avatarFan Yong <fan.yong@intel.com>
Reviewed-by: default avatarNathaniel Clark <nathaniel.l.clark@intel.com>
Reviewed-by: default avatarwangdi <di.wang@intel.com>
Reviewed-by: default avatarJohn L. Hammond <john.hammond@intel.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarJames Simmons <jsimmons@infradead.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent be191af9
...@@ -2252,6 +2252,11 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa); ...@@ -2252,6 +2252,11 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
*/ */
#define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */ #define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */
#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \
MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \
MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \
MDS_OPEN_RELEASE)
enum mds_op_bias { enum mds_op_bias {
MDS_CHECK_SPLIT = 1 << 0, MDS_CHECK_SPLIT = 1 << 0,
MDS_CROSS_REF = 1 << 1, MDS_CROSS_REF = 1 << 1,
......
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
#include "obd_class.h" #include "obd_class.h"
#include "lustre_net.h" #include "lustre_net.h"
#include "lustre_mds.h"
#include "lustre_ha.h" #include "lustre_ha.h"
/* 4UL * 1024 * 1024 */ /* 4UL * 1024 * 1024 */
......
...@@ -58,9 +58,6 @@ struct mds_group_info { ...@@ -58,9 +58,6 @@ struct mds_group_info {
#define MDD_OBD_NAME "mdd_obd" #define MDD_OBD_NAME "mdd_obd"
#define MDD_OBD_UUID "mdd_obd_uuid" #define MDD_OBD_UUID "mdd_obd_uuid"
/* these are local flags, used only on the client, private */
#define M_CHECK_STALE 0200000000
/** @} mds */ /** @} mds */
#endif #endif
...@@ -379,53 +379,35 @@ int ll_file_release(struct inode *inode, struct file *file) ...@@ -379,53 +379,35 @@ int ll_file_release(struct inode *inode, struct file *file)
return rc; return rc;
} }
static int ll_intent_file_open(struct dentry *dentry, void *lmm, static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
int lmmsize, struct lookup_intent *itp) struct lookup_intent *itp)
{ {
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(de);
struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_sb_info *sbi = ll_i2sbi(inode);
struct dentry *parent = dentry->d_parent; struct dentry *parent = de->d_parent;
const char *name = dentry->d_name.name; const char *name = NULL;
const int len = dentry->d_name.len;
struct md_op_data *op_data; struct md_op_data *op_data;
struct ptlrpc_request *req; struct ptlrpc_request *req;
__u32 opc = LUSTRE_OPC_ANY; int len = 0, rc;
int rc;
/* Usually we come here only for NFSD, and we want open lock. */ LASSERT(parent);
/* We can also get here if there was cached open handle in revalidate_it LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
* but it disappeared while we were getting from there to ll_file_open.
* But this means this file was closed and immediately opened which /*
* makes a good candidate for using OPEN lock * if server supports open-by-fid, or file name is invalid, don't pack
*/ * name in open request
/* If lmmsize & lmm are not 0, we are just setting stripe info
* parameters. No need for the open lock
*/ */
if (!lmm && lmmsize == 0) { if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
struct ll_dentry_data *ldd = ll_d2d(dentry); lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
/* name = de->d_name.name;
* If we came via ll_iget_for_nfs, then we need to request len = de->d_name.len;
* struct ll_dentry_data *ldd = ll_d2d(file->f_dentry);
*
* NB: when ldd is NULL, it must have come via normal
* lookup path only, since ll_iget_for_nfs always calls
* ll_d_init().
*/
if (ldd && ldd->lld_nfs_dentry) {
ldd->lld_nfs_dentry = 0;
itp->it_flags |= MDS_OPEN_LOCK;
}
if (itp->it_flags & FMODE_WRITE)
opc = LUSTRE_OPC_CREATE;
} }
op_data = ll_prep_md_op_data(NULL, d_inode(parent), op_data = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len,
inode, name, len, O_RDWR, LUSTRE_OPC_ANY, NULL);
O_RDWR, opc, NULL);
if (IS_ERR(op_data)) if (IS_ERR(op_data))
return PTR_ERR(op_data); return PTR_ERR(op_data);
itp->it_flags |= MDS_OPEN_BY_FID;
rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp, rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
0 /*unused */, &req, ll_md_blocking_ast, 0); 0 /*unused */, &req, ll_md_blocking_ast, 0);
ll_finish_md_op_data(op_data); ll_finish_md_op_data(op_data);
...@@ -655,9 +637,19 @@ int ll_file_open(struct inode *inode, struct file *file) ...@@ -655,9 +637,19 @@ int ll_file_open(struct inode *inode, struct file *file)
* result in a deadlock * result in a deadlock
*/ */
mutex_unlock(&lli->lli_och_mutex); mutex_unlock(&lli->lli_och_mutex);
it->it_create_mode |= M_CHECK_STALE; /*
* Normally called under two situations:
* 1. NFS export.
* 2. revalidate with IT_OPEN (revalidate doesn't
* execute this intent any more).
*
* Always fetch MDS_OPEN_LOCK if this is not setstripe.
*
* Always specify MDS_OPEN_BY_FID because we don't want
* to get file with different fid.
*/
it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it); rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it);
it->it_create_mode &= ~M_CHECK_STALE;
if (rc) if (rc)
goto out_openerr; goto out_openerr;
...@@ -1399,6 +1391,7 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, ...@@ -1399,6 +1391,7 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
} }
ll_inode_size_lock(inode); ll_inode_size_lock(inode);
oit.it_flags |= MDS_OPEN_BY_FID;
rc = ll_intent_file_open(dentry, lum, lum_size, &oit); rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
if (rc) if (rc)
goto out_unlock; goto out_unlock;
...@@ -3066,7 +3059,6 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) ...@@ -3066,7 +3059,6 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
if (IS_ERR(op_data)) if (IS_ERR(op_data))
return PTR_ERR(op_data); return PTR_ERR(op_data);
oit.it_create_mode |= M_CHECK_STALE;
rc = md_intent_lock(exp, op_data, NULL, 0, rc = md_intent_lock(exp, op_data, NULL, 0,
/* we are not interested in name /* we are not interested in name
* based lookup * based lookup
...@@ -3074,7 +3066,6 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) ...@@ -3074,7 +3066,6 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
&oit, 0, &req, &oit, 0, &req,
ll_md_blocking_ast, 0); ll_md_blocking_ast, 0);
ll_finish_md_op_data(op_data); ll_finish_md_op_data(op_data);
oit.it_create_mode &= ~M_CHECK_STALE;
if (rc < 0) { if (rc < 0) {
rc = ll_inode_revalidate_fini(inode, rc); rc = ll_inode_revalidate_fini(inode, rc);
goto out; goto out;
......
...@@ -118,9 +118,7 @@ struct ll_inode_info { ...@@ -118,9 +118,7 @@ struct ll_inode_info {
/* identifying fields for both metadata and data stacks. */ /* identifying fields for both metadata and data stacks. */
struct lu_fid lli_fid; struct lu_fid lli_fid;
/* Parent fid for accessing default stripe data on parent directory /* master inode fid for stripe directory */
* for allocating OST objects after a mknod() and later open-by-FID.
*/
struct lu_fid lli_pfid; struct lu_fid lli_pfid;
struct list_head lli_close_list; struct list_head lli_close_list;
......
...@@ -189,7 +189,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, ...@@ -189,7 +189,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
OBD_CONNECT_PINGLESS | OBD_CONNECT_PINGLESS |
OBD_CONNECT_MAX_EASIZE | OBD_CONNECT_MAX_EASIZE |
OBD_CONNECT_FLOCK_DEAD | OBD_CONNECT_FLOCK_DEAD |
OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK; OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
OBD_CONNECT_OPEN_BY_FID;
if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
data->ocd_connect_flags |= OBD_CONNECT_SOM; data->ocd_connect_flags |= OBD_CONNECT_SOM;
...@@ -2364,20 +2365,6 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, ...@@ -2364,20 +2365,6 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
op_data->op_mds = 0; op_data->op_mds = 0;
op_data->op_data = data; op_data->op_data = data;
/* If the file is being opened after mknod() (normally due to NFS)
* try to use the default stripe data from parent directory for
* allocating OST objects. Try to pass the parent FID to MDS.
*/
if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) &&
!ll_i2info(i2)->lli_has_smd) {
struct ll_inode_info *lli = ll_i2info(i2);
spin_lock(&lli->lli_lock);
if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid)))
op_data->op_fid1 = lli->lli_pfid;
spin_unlock(&lli->lli_lock);
}
/* When called by ll_setattr_raw, file is i1. */ /* When called by ll_setattr_raw, file is i1. */
if (ll_i2info(i1)->lli_flags & LLIF_DATA_MODIFIED) if (ll_i2info(i1)->lli_flags & LLIF_DATA_MODIFIED)
op_data->op_bias |= MDS_DATA_MODIFIED; op_data->op_bias |= MDS_DATA_MODIFIED;
......
...@@ -148,12 +148,18 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren ...@@ -148,12 +148,18 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
return ERR_PTR(-ESTALE); return ERR_PTR(-ESTALE);
} }
result = d_obtain_alias(inode);
if (IS_ERR(result)) {
iput(inode);
return result;
}
/** /**
* It is an anonymous dentry without OST objects created yet. * In case d_obtain_alias() found a disconnected dentry, always update
* We have to find the parent to tell MDS how to init lov objects. * lli_pfid to allow later operation (normally open) have parent fid,
* which may be used by MDS to create data.
*/ */
if (S_ISREG(inode->i_mode) && !ll_i2info(inode)->lli_has_smd && if (parent) {
parent && !fid_is_zero(parent)) {
struct ll_inode_info *lli = ll_i2info(inode); struct ll_inode_info *lli = ll_i2info(inode);
spin_lock(&lli->lli_lock); spin_lock(&lli->lli_lock);
......
...@@ -650,6 +650,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, ...@@ -650,6 +650,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
} }
it->it_create_mode = (mode & S_IALLUGO) | S_IFREG; it->it_create_mode = (mode & S_IALLUGO) | S_IFREG;
it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags); it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags);
it->it_flags &= ~MDS_OPEN_FL_INTERNAL;
/* Dentry added to dcache tree in ll_lookup_it */ /* Dentry added to dcache tree in ll_lookup_it */
de = ll_lookup_it(dir, dentry, it, lookup_flags); de = ll_lookup_it(dir, dentry, it, lookup_flags);
......
...@@ -111,10 +111,6 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm, ...@@ -111,10 +111,6 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
*/ */
LASSERT(it->it_op & IT_OPEN); LASSERT(it->it_op & IT_OPEN);
op_data->op_fid2 = *parent_fid; op_data->op_fid2 = *parent_fid;
/* Add object FID to op_fid3, in case it needs to check stale
* (M_CHECK_STALE), see mdc_finish_intent_lock
*/
op_data->op_fid3 = body->mbo_fid1;
} }
op_data->op_bias = MDS_CROSS_REF; op_data->op_bias = MDS_CROSS_REF;
...@@ -313,17 +309,16 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, ...@@ -313,17 +309,16 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
struct mdt_body *body; struct mdt_body *body;
int rc; int rc;
if (it->it_flags & MDS_OPEN_BY_FID && fid_is_sane(&op_data->op_fid2)) { if (it->it_flags & MDS_OPEN_BY_FID) {
if (op_data->op_mea1) { LASSERT(fid_is_sane(&op_data->op_fid2));
struct lmv_stripe_md *lsm = op_data->op_mea1;
const struct lmv_oinfo *oinfo;
oinfo = lsm_name_to_stripe_info(lsm, op_data->op_name, /*
op_data->op_namelen); * for striped directory, we can't know parent stripe fid
if (IS_ERR(oinfo)) * without name, but we can set it to child fid, and MDT
return PTR_ERR(oinfo); * will obtain it from linkea in open in such case.
op_data->op_fid1 = oinfo->lmo_fid; */
} if (op_data->op_mea1)
op_data->op_fid1 = op_data->op_fid2;
tgt = lmv_find_target(lmv, &op_data->op_fid2); tgt = lmv_find_target(lmv, &op_data->op_fid2);
if (IS_ERR(tgt)) if (IS_ERR(tgt))
...@@ -331,6 +326,10 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, ...@@ -331,6 +326,10 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
op_data->op_mds = tgt->ltd_idx; op_data->op_mds = tgt->ltd_idx;
} else { } else {
LASSERT(fid_is_sane(&op_data->op_fid1));
LASSERT(fid_is_zero(&op_data->op_fid2));
LASSERT(op_data->op_name);
tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
if (IS_ERR(tgt)) if (IS_ERR(tgt))
return PTR_ERR(tgt); return PTR_ERR(tgt);
...@@ -339,13 +338,11 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, ...@@ -339,13 +338,11 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
/* If it is ready to open the file by FID, do not need /* If it is ready to open the file by FID, do not need
* allocate FID at all, otherwise it will confuse MDT * allocate FID at all, otherwise it will confuse MDT
*/ */
if ((it->it_op & IT_CREAT) && if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) {
!(it->it_flags & MDS_OPEN_BY_FID)) {
/* /*
* For open with IT_CREATE and for IT_CREATE cases allocate new * For lookup(IT_CREATE) cases allocate new fid and setup FLD
* fid and setup FLD for it. * for it.
*/ */
op_data->op_fid3 = op_data->op_fid2;
rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
if (rc != 0) if (rc != 0)
return rc; return rc;
...@@ -494,9 +491,9 @@ int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data, ...@@ -494,9 +491,9 @@ int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
LASSERT(fid_is_sane(&op_data->op_fid1)); LASSERT(fid_is_sane(&op_data->op_fid1));
CDEBUG(D_INODE, "INTENT LOCK '%s' for '%*s' on "DFID"\n", CDEBUG(D_INODE, "INTENT LOCK '%s' for "DFID" '%*s' on "DFID"\n",
LL_IT2STR(it), op_data->op_namelen, op_data->op_name, LL_IT2STR(it), PFID(&op_data->op_fid2), op_data->op_namelen,
PFID(&op_data->op_fid1)); op_data->op_name, PFID(&op_data->op_fid1));
rc = lmv_check_connect(obd); rc = lmv_check_connect(obd);
if (rc) if (rc)
......
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#define _MDC_INTERNAL_H #define _MDC_INTERNAL_H
#include "../include/lustre_mdc.h" #include "../include/lustre_mdc.h"
#include "../include/lustre_mds.h"
void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars); void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars);
......
...@@ -171,10 +171,7 @@ void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data, ...@@ -171,10 +171,7 @@ void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
static __u64 mds_pack_open_flags(__u64 flags, __u32 mode) static __u64 mds_pack_open_flags(__u64 flags, __u32 mode)
{ {
__u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE | __u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE |
MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | MDS_OPEN_FL_INTERNAL));
MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |
MDS_OPEN_BY_FID | MDS_OPEN_LEASE |
MDS_OPEN_RELEASE));
if (flags & O_CREAT) if (flags & O_CREAT)
cr_flags |= MDS_OPEN_CREAT; cr_flags |= MDS_OPEN_CREAT;
if (flags & O_EXCL) if (flags & O_EXCL)
......
...@@ -922,27 +922,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp, ...@@ -922,27 +922,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
mdt_body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); mdt_body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
LASSERT(mdt_body); /* mdc_enqueue checked */ LASSERT(mdt_body); /* mdc_enqueue checked */
/* If we were revalidating a fid/name pair, mark the intent in
* case we fail and get called again from lookup
*/
if (fid_is_sane(&op_data->op_fid2) &&
it->it_create_mode & M_CHECK_STALE &&
it->it_op != IT_GETATTR) {
/* Also: did we find the same inode? */
/* sever can return one of two fids:
* op_fid2 - new allocated fid - if file is created.
* op_fid3 - existent fid - if file only open.
* op_fid3 is saved in lmv_intent_open
*/
if ((!lu_fid_eq(&op_data->op_fid2, &mdt_body->mbo_fid1)) &&
(!lu_fid_eq(&op_data->op_fid3, &mdt_body->mbo_fid1))) {
CDEBUG(D_DENTRY, "Found stale data "DFID"("DFID")/"DFID
"\n", PFID(&op_data->op_fid2),
PFID(&op_data->op_fid2), PFID(&mdt_body->mbo_fid1));
return -ESTALE;
}
}
rc = it_open_error(DISP_LOOKUP_EXECD, it); rc = it_open_error(DISP_LOOKUP_EXECD, it);
if (rc) if (rc)
return rc; return rc;
......
...@@ -96,7 +96,7 @@ static const char * const obd_connect_names[] = { ...@@ -96,7 +96,7 @@ static const char * const obd_connect_names[] = {
"pingless", "pingless",
"flock_deadlock", "flock_deadlock",
"disp_stripe", "disp_stripe",
"unknown", "open_by_fid",
"lfsck", "lfsck",
"unknown", "unknown",
NULL NULL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment