Commit b38c9eb4 authored by Xiubo Li's avatar Xiubo Li Committed by Ilya Dryomov

ceph: add possible_max_rank and make the code more readable

The m_num_mds here is actually the number for MDSs which are in
up:active status, and it will be duplicated to m_num_active_mds,
so remove it.

Add possible_max_rank to the mdsmap struct and this will be
the correctly possible largest rank boundary.

Remove the special case for one mds in __mdsmap_get_random_mds(),
because the validate mds rank may not always be 0.
Signed-off-by: default avatarXiubo Li <xiubli@redhat.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 0eb30853
...@@ -33,7 +33,7 @@ static int mdsmap_show(struct seq_file *s, void *p) ...@@ -33,7 +33,7 @@ static int mdsmap_show(struct seq_file *s, void *p)
seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds); seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds);
seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout); seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout);
seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose); seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose);
for (i = 0; i < mdsmap->m_num_mds; i++) { for (i = 0; i < mdsmap->possible_max_rank; i++) {
struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr; struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
int state = mdsmap->m_info[i].state; int state = mdsmap->m_info[i].state;
seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
......
...@@ -598,7 +598,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, ...@@ -598,7 +598,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
{ {
struct ceph_mds_session *s; struct ceph_mds_session *s;
if (mds >= mdsc->mdsmap->m_num_mds) if (mds >= mdsc->mdsmap->possible_max_rank)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
s = kzalloc(sizeof(*s), GFP_NOFS); s = kzalloc(sizeof(*s), GFP_NOFS);
...@@ -1231,7 +1231,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc, ...@@ -1231,7 +1231,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
struct ceph_mds_session *ts; struct ceph_mds_session *ts;
int i, mds = session->s_mds; int i, mds = session->s_mds;
if (mds >= mdsc->mdsmap->m_num_mds) if (mds >= mdsc->mdsmap->possible_max_rank)
return; return;
mi = &mdsc->mdsmap->m_info[mds]; mi = &mdsc->mdsmap->m_info[mds];
...@@ -3785,7 +3785,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, ...@@ -3785,7 +3785,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
dout("check_new_map new %u old %u\n", dout("check_new_map new %u old %u\n",
newmap->m_epoch, oldmap->m_epoch); newmap->m_epoch, oldmap->m_epoch);
for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) { for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) {
if (!mdsc->sessions[i]) if (!mdsc->sessions[i])
continue; continue;
s = mdsc->sessions[i]; s = mdsc->sessions[i];
...@@ -3799,7 +3799,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, ...@@ -3799,7 +3799,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
ceph_session_state_name(s->s_state)); ceph_session_state_name(s->s_state));
if (i >= newmap->m_num_mds) { if (i >= newmap->possible_max_rank) {
/* force close session for stopped mds */ /* force close session for stopped mds */
get_session(s); get_session(s);
__unregister_session(mdsc, s); __unregister_session(mdsc, s);
...@@ -3856,7 +3856,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, ...@@ -3856,7 +3856,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
} }
} }
for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) { for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) {
s = mdsc->sessions[i]; s = mdsc->sessions[i];
if (!s) if (!s)
continue; continue;
......
...@@ -14,22 +14,15 @@ ...@@ -14,22 +14,15 @@
#include "super.h" #include "super.h"
#define CEPH_MDS_IS_READY(i, ignore_laggy) \ #define CEPH_MDS_IS_READY(i, ignore_laggy) \
(m->m_info[i].state > 0 && (ignore_laggy ? true : !m->m_info[i].laggy)) (m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy)
static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
{ {
int n = 0; int n = 0;
int i, j; int i, j;
/*
* special case for one mds, no matter it is laggy or
* not we have no choice
*/
if (1 == m->m_num_mds && m->m_info[0].state > 0)
return 0;
/* count */ /* count */
for (i = 0; i < m->m_num_mds; i++) for (i = 0; i < m->possible_max_rank; i++)
if (CEPH_MDS_IS_READY(i, ignore_laggy)) if (CEPH_MDS_IS_READY(i, ignore_laggy))
n++; n++;
if (n == 0) if (n == 0)
...@@ -37,7 +30,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) ...@@ -37,7 +30,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
/* pick */ /* pick */
n = prandom_u32() % n; n = prandom_u32() % n;
for (j = 0, i = 0; i < m->m_num_mds; i++) { for (j = 0, i = 0; i < m->possible_max_rank; i++) {
if (CEPH_MDS_IS_READY(i, ignore_laggy)) if (CEPH_MDS_IS_READY(i, ignore_laggy))
j++; j++;
if (j > n) if (j > n)
...@@ -55,10 +48,10 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) ...@@ -55,10 +48,10 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
int mds; int mds;
mds = __mdsmap_get_random_mds(m, false); mds = __mdsmap_get_random_mds(m, false);
if (mds == m->m_num_mds || mds == -1) if (mds == m->possible_max_rank || mds == -1)
mds = __mdsmap_get_random_mds(m, true); mds = __mdsmap_get_random_mds(m, true);
return mds == m->m_num_mds ? -1 : mds; return mds == m->possible_max_rank ? -1 : mds;
} }
#define __decode_and_drop_type(p, end, type, bad) \ #define __decode_and_drop_type(p, end, type, bad) \
...@@ -129,7 +122,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -129,7 +122,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
int err; int err;
u8 mdsmap_v, mdsmap_cv; u8 mdsmap_v, mdsmap_cv;
u16 mdsmap_ev; u16 mdsmap_ev;
u32 possible_max_rank;
m = kzalloc(sizeof(*m), GFP_NOFS); m = kzalloc(sizeof(*m), GFP_NOFS);
if (!m) if (!m)
...@@ -157,24 +149,23 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -157,24 +149,23 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
m->m_max_mds = ceph_decode_32(p); m->m_max_mds = ceph_decode_32(p);
/* /*
* pick out the active nodes as the m_num_mds, the m_num_mds * pick out the active nodes as the m_num_active_mds, the
* maybe larger than m_max_mds when decreasing the max_mds in * m_num_active_mds maybe larger than m_max_mds when decreasing
* cluster side, in other case it should less than or equal * the max_mds in cluster side, in other case it should less
* to m_max_mds. * than or equal to m_max_mds.
*/ */
m->m_num_mds = n = ceph_decode_32(p); m->m_num_active_mds = n = ceph_decode_32(p);
m->m_num_active_mds = m->m_num_mds;
/* /*
* the possible max rank, it maybe larger than the m->m_num_mds, * the possible max rank, it maybe larger than the m_num_active_mds,
* for example if the mds_max == 2 in the cluster, when the MDS(0) * for example if the mds_max == 2 in the cluster, when the MDS(0)
* was laggy and being replaced by a new MDS, we will temporarily * was laggy and being replaced by a new MDS, we will temporarily
* receive a new mds map with n_num_mds == 1 and the active MDS(1), * receive a new mds map with n_num_mds == 1 and the active MDS(1),
* and the mds rank >= m->m_num_mds. * and the mds rank >= m_num_active_mds.
*/ */
possible_max_rank = max((u32)m->m_num_mds, m->m_max_mds); m->possible_max_rank = max(m->m_num_active_mds, m->m_max_mds);
m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS); m->m_info = kcalloc(m->possible_max_rank, sizeof(*m->m_info), GFP_NOFS);
if (!m->m_info) if (!m->m_info)
goto nomem; goto nomem;
...@@ -248,7 +239,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -248,7 +239,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
ceph_mds_state_name(state), ceph_mds_state_name(state),
laggy ? "(laggy)" : ""); laggy ? "(laggy)" : "");
if (mds < 0 || mds >= possible_max_rank) { if (mds < 0 || mds >= m->possible_max_rank) {
pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds); pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds);
continue; continue;
} }
...@@ -318,14 +309,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -318,14 +309,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
s32 mds = ceph_decode_32(p); s32 mds = ceph_decode_32(p);
if (mds >= 0 && mds < m->m_num_mds) { if (mds >= 0 && mds < m->possible_max_rank) {
if (m->m_info[mds].laggy) if (m->m_info[mds].laggy)
num_laggy++; num_laggy++;
} }
} }
m->m_num_laggy = num_laggy; m->m_num_laggy = num_laggy;
if (n > m->m_num_mds) { if (n > m->possible_max_rank) {
void *new_m_info = krealloc(m->m_info, void *new_m_info = krealloc(m->m_info,
n * sizeof(*m->m_info), n * sizeof(*m->m_info),
GFP_NOFS | __GFP_ZERO); GFP_NOFS | __GFP_ZERO);
...@@ -333,7 +324,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -333,7 +324,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
goto nomem; goto nomem;
m->m_info = new_m_info; m->m_info = new_m_info;
} }
m->m_num_mds = n; m->possible_max_rank = n;
} }
/* inc */ /* inc */
...@@ -404,7 +395,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m) ...@@ -404,7 +395,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
{ {
int i; int i;
for (i = 0; i < m->m_num_mds; i++) for (i = 0; i < m->possible_max_rank; i++)
kfree(m->m_info[i].export_targets); kfree(m->m_info[i].export_targets);
kfree(m->m_info); kfree(m->m_info);
kfree(m->m_data_pg_pools); kfree(m->m_data_pg_pools);
...@@ -420,7 +411,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m) ...@@ -420,7 +411,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
return false; return false;
if (m->m_num_laggy == m->m_num_active_mds) if (m->m_num_laggy == m->m_num_active_mds)
return false; return false;
for (i = 0; i < m->m_num_mds; i++) { for (i = 0; i < m->possible_max_rank; i++) {
if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE) if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
nr_active++; nr_active++;
} }
......
...@@ -26,8 +26,8 @@ struct ceph_mdsmap { ...@@ -26,8 +26,8 @@ struct ceph_mdsmap {
u32 m_session_autoclose; /* seconds */ u32 m_session_autoclose; /* seconds */
u64 m_max_file_size; u64 m_max_file_size;
u32 m_max_mds; /* expected up:active mds number */ u32 m_max_mds; /* expected up:active mds number */
int m_num_active_mds; /* actual up:active mds number */ u32 m_num_active_mds; /* actual up:active mds number */
int m_num_mds; /* size of m_info array */ u32 possible_max_rank; /* possible max rank index */
struct ceph_mds_info *m_info; struct ceph_mds_info *m_info;
/* which object pools file data can be stored in */ /* which object pools file data can be stored in */
...@@ -43,7 +43,7 @@ struct ceph_mdsmap { ...@@ -43,7 +43,7 @@ struct ceph_mdsmap {
static inline struct ceph_entity_addr * static inline struct ceph_entity_addr *
ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
{ {
if (w >= m->m_num_mds) if (w >= m->possible_max_rank)
return NULL; return NULL;
return &m->m_info[w].addr; return &m->m_info[w].addr;
} }
...@@ -51,14 +51,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) ...@@ -51,14 +51,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
{ {
BUG_ON(w < 0); BUG_ON(w < 0);
if (w >= m->m_num_mds) if (w >= m->possible_max_rank)
return CEPH_MDS_STATE_DNE; return CEPH_MDS_STATE_DNE;
return m->m_info[w].state; return m->m_info[w].state;
} }
static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
{ {
if (w >= 0 && w < m->m_num_mds) if (w >= 0 && w < m->possible_max_rank)
return m->m_info[w].laggy; return m->m_info[w].laggy;
return false; return false;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment