Commit c63716ab authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "A bunch of fixes and follow-ups for -rc1 Luminous patches: issues with
  ->reencode_message() and last minute RADOS semantic changes in
  v12.1.2"

* tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client:
  libceph: make RECOVERY_DELETES feature create a new interval
  libceph: upmap semantic changes
  crush: assume weight_set != null imples weight_set_size > 0
  libceph: fallback for when there isn't a pool-specific choose_arg
  libceph: don't call ->reencode_message() more than once per message
  libceph: make encode_request_*() work with r_mempool requests
parents a64c40e7 ae78dd81
...@@ -148,6 +148,7 @@ struct ceph_osd_request_target { ...@@ -148,6 +148,7 @@ struct ceph_osd_request_target {
int size; int size;
int min_size; int min_size;
bool sort_bitwise; bool sort_bitwise;
bool recovery_deletes;
unsigned int flags; /* CEPH_OSD_FLAG_* */ unsigned int flags; /* CEPH_OSD_FLAG_* */
bool paused; bool paused;
......
...@@ -272,6 +272,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting, ...@@ -272,6 +272,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
u32 new_pg_num, u32 new_pg_num,
bool old_sort_bitwise, bool old_sort_bitwise,
bool new_sort_bitwise, bool new_sort_bitwise,
bool old_recovery_deletes,
bool new_recovery_deletes,
const struct ceph_pg *pgid); const struct ceph_pg *pgid);
bool ceph_osds_changed(const struct ceph_osds *old_acting, bool ceph_osds_changed(const struct ceph_osds *old_acting,
const struct ceph_osds *new_acting, const struct ceph_osds *new_acting,
......
...@@ -158,6 +158,10 @@ extern const char *ceph_osd_state_name(int s); ...@@ -158,6 +158,10 @@ extern const char *ceph_osd_state_name(int s);
#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */ #define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */ #define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */ #define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
#define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */
#define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */
#define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */
#define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */
/* /*
* The error code to return when an OSD can't handle a write * The error code to return when an OSD can't handle a write
......
...@@ -193,7 +193,7 @@ struct crush_choose_arg { ...@@ -193,7 +193,7 @@ struct crush_choose_arg {
struct crush_choose_arg_map { struct crush_choose_arg_map {
#ifdef __KERNEL__ #ifdef __KERNEL__
struct rb_node node; struct rb_node node;
u64 choose_args_index; s64 choose_args_index;
#endif #endif
struct crush_choose_arg *args; /*!< replacement for each bucket struct crush_choose_arg *args; /*!< replacement for each bucket
in the crushmap */ in the crushmap */
......
...@@ -306,7 +306,7 @@ static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket, ...@@ -306,7 +306,7 @@ static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket,
const struct crush_choose_arg *arg, const struct crush_choose_arg *arg,
int position) int position)
{ {
if (!arg || !arg->weight_set || arg->weight_set_size == 0) if (!arg || !arg->weight_set)
return bucket->item_weights; return bucket->item_weights;
if (position >= arg->weight_set_size) if (position >= arg->weight_set_size)
......
...@@ -1287,10 +1287,10 @@ static void prepare_write_message(struct ceph_connection *con) ...@@ -1287,10 +1287,10 @@ static void prepare_write_message(struct ceph_connection *con)
if (m->needs_out_seq) { if (m->needs_out_seq) {
m->hdr.seq = cpu_to_le64(++con->out_seq); m->hdr.seq = cpu_to_le64(++con->out_seq);
m->needs_out_seq = false; m->needs_out_seq = false;
}
if (con->ops->reencode_message) if (con->ops->reencode_message)
con->ops->reencode_message(m); con->ops->reencode_message(m);
}
dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
m, con->out_seq, le16_to_cpu(m->hdr.type), m, con->out_seq, le16_to_cpu(m->hdr.type),
......
...@@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, ...@@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
bool legacy_change; bool legacy_change;
bool split = false; bool split = false;
bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
bool recovery_deletes = ceph_osdmap_flag(osdc,
CEPH_OSDMAP_RECOVERY_DELETES);
enum calc_target_result ct_res; enum calc_target_result ct_res;
int ret; int ret;
...@@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, ...@@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
pi->pg_num, pi->pg_num,
t->sort_bitwise, t->sort_bitwise,
sort_bitwise, sort_bitwise,
t->recovery_deletes,
recovery_deletes,
&last_pgid)) &last_pgid))
force_resend = true; force_resend = true;
...@@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, ...@@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
t->pg_num = pi->pg_num; t->pg_num = pi->pg_num;
t->pg_num_mask = pi->pg_num_mask; t->pg_num_mask = pi->pg_num_mask;
t->sort_bitwise = sort_bitwise; t->sort_bitwise = sort_bitwise;
t->recovery_deletes = recovery_deletes;
t->osd = acting.primary; t->osd = acting.primary;
} }
...@@ -1918,10 +1923,12 @@ static void encode_request_partial(struct ceph_osd_request *req, ...@@ -1918,10 +1923,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
} }
ceph_encode_32(&p, req->r_attempts); /* retry_attempt */ ceph_encode_32(&p, req->r_attempts); /* retry_attempt */
BUG_ON(p != end - 8); /* space for features */ BUG_ON(p > end - 8); /* space for features */
msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */ msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */
/* front_len is finalized in encode_request_finish() */ /* front_len is finalized in encode_request_finish() */
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
msg->hdr.data_len = cpu_to_le32(data_len); msg->hdr.data_len = cpu_to_le32(data_len);
/* /*
* The header "data_off" is a hint to the receiver allowing it * The header "data_off" is a hint to the receiver allowing it
...@@ -1937,11 +1944,12 @@ static void encode_request_partial(struct ceph_osd_request *req, ...@@ -1937,11 +1944,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
static void encode_request_finish(struct ceph_msg *msg) static void encode_request_finish(struct ceph_msg *msg)
{ {
void *p = msg->front.iov_base; void *p = msg->front.iov_base;
void *const partial_end = p + msg->front.iov_len;
void *const end = p + msg->front_alloc_len; void *const end = p + msg->front_alloc_len;
if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) { if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) {
/* luminous OSD -- encode features and be done */ /* luminous OSD -- encode features and be done */
p = end - 8; p = partial_end;
ceph_encode_64(&p, msg->con->peer_features); ceph_encode_64(&p, msg->con->peer_features);
} else { } else {
struct { struct {
...@@ -1984,7 +1992,7 @@ static void encode_request_finish(struct ceph_msg *msg) ...@@ -1984,7 +1992,7 @@ static void encode_request_finish(struct ceph_msg *msg)
oid_len = p - oid; oid_len = p - oid;
tail = p; tail = p;
tail_len = (end - p) - 8; tail_len = partial_end - p;
p = msg->front.iov_base; p = msg->front.iov_base;
ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc)); ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc));
......
...@@ -295,6 +295,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c) ...@@ -295,6 +295,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c)
ret = decode_choose_arg(p, end, arg); ret = decode_choose_arg(p, end, arg);
if (ret) if (ret)
goto fail; goto fail;
if (arg->ids_size &&
arg->ids_size != c->buckets[bucket_index]->size)
goto e_inval;
} }
insert_choose_arg_map(&c->choose_args, arg_map); insert_choose_arg_map(&c->choose_args, arg_map);
...@@ -2078,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting, ...@@ -2078,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
u32 new_pg_num, u32 new_pg_num,
bool old_sort_bitwise, bool old_sort_bitwise,
bool new_sort_bitwise, bool new_sort_bitwise,
bool old_recovery_deletes,
bool new_recovery_deletes,
const struct ceph_pg *pgid) const struct ceph_pg *pgid)
{ {
return !osds_equal(old_acting, new_acting) || return !osds_equal(old_acting, new_acting) ||
...@@ -2085,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting, ...@@ -2085,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
old_size != new_size || old_size != new_size ||
old_min_size != new_min_size || old_min_size != new_min_size ||
ceph_pg_is_split(pgid, old_pg_num, new_pg_num) || ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
old_sort_bitwise != new_sort_bitwise; old_sort_bitwise != new_sort_bitwise ||
old_recovery_deletes != new_recovery_deletes;
} }
static int calc_pg_rank(int osd, const struct ceph_osds *acting) static int calc_pg_rank(int osd, const struct ceph_osds *acting)
...@@ -2301,10 +2308,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi, ...@@ -2301,10 +2308,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
} }
} }
/*
* Magic value used for a "default" fallback choose_args, used if the
* crush_choose_arg_map passed to do_crush() does not exist. If this
* also doesn't exist, fall back to canonical weights.
*/
#define CEPH_DEFAULT_CHOOSE_ARGS -1
static int do_crush(struct ceph_osdmap *map, int ruleno, int x, static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
int *result, int result_max, int *result, int result_max,
const __u32 *weight, int weight_max, const __u32 *weight, int weight_max,
u64 choose_args_index) s64 choose_args_index)
{ {
struct crush_choose_arg_map *arg_map; struct crush_choose_arg_map *arg_map;
int r; int r;
...@@ -2313,6 +2327,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, ...@@ -2313,6 +2327,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
arg_map = lookup_choose_arg_map(&map->crush->choose_args, arg_map = lookup_choose_arg_map(&map->crush->choose_args,
choose_args_index); choose_args_index);
if (!arg_map)
arg_map = lookup_choose_arg_map(&map->crush->choose_args,
CEPH_DEFAULT_CHOOSE_ARGS);
mutex_lock(&map->crush_workspace_mutex); mutex_lock(&map->crush_workspace_mutex);
r = crush_do_rule(map->crush, ruleno, x, result, result_max, r = crush_do_rule(map->crush, ruleno, x, result, result_max,
...@@ -2423,40 +2440,23 @@ static void apply_upmap(struct ceph_osdmap *osdmap, ...@@ -2423,40 +2440,23 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
for (i = 0; i < pg->pg_upmap.len; i++) for (i = 0; i < pg->pg_upmap.len; i++)
raw->osds[i] = pg->pg_upmap.osds[i]; raw->osds[i] = pg->pg_upmap.osds[i];
raw->size = pg->pg_upmap.len; raw->size = pg->pg_upmap.len;
return; /* check and apply pg_upmap_items, if any */
} }
pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid); pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
if (pg) { if (pg) {
/* for (i = 0; i < raw->size; i++) {
* Note: this approach does not allow a bidirectional swap, for (j = 0; j < pg->pg_upmap_items.len; j++) {
* e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1]. int from = pg->pg_upmap_items.from_to[j][0];
*/ int to = pg->pg_upmap_items.from_to[j][1];
for (i = 0; i < pg->pg_upmap_items.len; i++) {
int from = pg->pg_upmap_items.from_to[i][0]; if (from == raw->osds[i]) {
int to = pg->pg_upmap_items.from_to[i][1]; if (!(to != CRUSH_ITEM_NONE &&
int pos = -1; to < osdmap->max_osd &&
bool exists = false; osdmap->osd_weight[to] == 0))
raw->osds[i] = to;
/* make sure replacement doesn't already appear */
for (j = 0; j < raw->size; j++) {
int osd = raw->osds[j];
if (osd == to) {
exists = true;
break; break;
} }
/* ignore mapping if target is marked out */
if (osd == from && pos < 0 &&
!(to != CRUSH_ITEM_NONE &&
to < osdmap->max_osd &&
osdmap->osd_weight[to] == 0)) {
pos = j;
}
}
if (!exists && pos >= 0) {
raw->osds[pos] = to;
return;
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment