Commit 79162547 authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov

ceph: make seeky readdir more efficient

Current cephfs client uses string to indicate start position of
readdir. The string is last entry of previous readdir reply.
This approach does not work for seeky readdir because we can
not easily convert the new postion to a string. For seeky readdir,
mds needs to return dentries from the beginning. Client keeps
retrying if the reply does not contain the dentry it wants.

In current version of ceph, mds sorts CDentry in its cache in
hash order. Client also uses dentry hash to compose dir postion.
For seeky readdir, if client passes the hash part of dir postion
to mds. mds can avoid replying useless dentries.
Signed-off-by: default avatar"Yan, Zheng" <zyan@redhat.com>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 2827528d
...@@ -378,7 +378,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -378,7 +378,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
return -ENOMEM; return -ENOMEM;
} }
} else if (is_hash_order(ctx->pos)) {
req->r_args.readdir.offset_hash =
cpu_to_le32(fpos_hash(ctx->pos));
} }
req->r_dir_release_cnt = fi->dir_release_count; req->r_dir_release_cnt = fi->dir_release_count;
req->r_dir_ordered_cnt = fi->dir_ordered_count; req->r_dir_ordered_cnt = fi->dir_ordered_count;
req->r_readdir_cache_idx = fi->readdir_cache_idx; req->r_readdir_cache_idx = fi->readdir_cache_idx;
......
...@@ -1482,10 +1482,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ...@@ -1482,10 +1482,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
return readdir_prepopulate_inodes_only(req, session); return readdir_prepopulate_inodes_only(req, session);
if (rinfo->hash_order && req->r_path2) { if (rinfo->hash_order) {
if (req->r_path2) {
last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash, last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
req->r_path2, strlen(req->r_path2)); req->r_path2,
strlen(req->r_path2));
last_hash = ceph_frag_value(last_hash); last_hash = ceph_frag_value(last_hash);
} else if (rinfo->offset_hash) {
/* mds understands offset_hash */
WARN_ON_ONCE(req->r_readdir_offset != 2);
last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
}
} }
if (rinfo->dir_dir && if (rinfo->dir_dir &&
...@@ -1510,7 +1517,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ...@@ -1510,7 +1517,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
} }
if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 && if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 &&
!(rinfo->hash_order && req->r_path2)) { !(rinfo->hash_order && last_hash)) {
/* note dir version at start of readdir so we can tell /* note dir version at start of readdir so we can tell
* if any dentries get dropped */ * if any dentries get dropped */
req->r_dir_release_cnt = atomic64_read(&ci->i_release_count); req->r_dir_release_cnt = atomic64_read(&ci->i_release_count);
......
...@@ -189,6 +189,7 @@ static int parse_reply_info_dir(void **p, void *end, ...@@ -189,6 +189,7 @@ static int parse_reply_info_dir(void **p, void *end,
info->dir_end = !!(flags & CEPH_READDIR_FRAG_END); info->dir_end = !!(flags & CEPH_READDIR_FRAG_END);
info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE); info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE);
info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER); info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER);
info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH);
} }
if (num == 0) if (num == 0)
goto done; goto done;
......
...@@ -83,9 +83,10 @@ struct ceph_mds_reply_info_parsed { ...@@ -83,9 +83,10 @@ struct ceph_mds_reply_info_parsed {
struct ceph_mds_reply_dirfrag *dir_dir; struct ceph_mds_reply_dirfrag *dir_dir;
size_t dir_buf_size; size_t dir_buf_size;
int dir_nr; int dir_nr;
bool dir_complete;
bool dir_end; bool dir_end;
bool dir_complete;
bool hash_order; bool hash_order;
bool offset_hash;
struct ceph_mds_reply_dir_entry *dir_entries; struct ceph_mds_reply_dir_entry *dir_entries;
}; };
......
...@@ -365,6 +365,7 @@ extern const char *ceph_mds_op_name(int op); ...@@ -365,6 +365,7 @@ extern const char *ceph_mds_op_name(int op);
#define CEPH_READDIR_FRAG_END (1<<0) #define CEPH_READDIR_FRAG_END (1<<0)
#define CEPH_READDIR_FRAG_COMPLETE (1<<8) #define CEPH_READDIR_FRAG_COMPLETE (1<<8)
#define CEPH_READDIR_HASH_ORDER (1<<9) #define CEPH_READDIR_HASH_ORDER (1<<9)
#define CEPH_READDIR_OFFSET_HASH (1<<10)
union ceph_mds_request_args { union ceph_mds_request_args {
struct { struct {
...@@ -384,6 +385,7 @@ union ceph_mds_request_args { ...@@ -384,6 +385,7 @@ union ceph_mds_request_args {
__le32 max_entries; /* how many dentries to grab */ __le32 max_entries; /* how many dentries to grab */
__le32 max_bytes; __le32 max_bytes;
__le16 flags; __le16 flags;
__le32 offset_hash;
} __attribute__ ((packed)) readdir; } __attribute__ ((packed)) readdir;
struct { struct {
__le32 mode; __le32 mode;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment