Commit 45ee68db authored by Ilya Dryomov's avatar Ilya Dryomov Committed by Ben Hutchings

libceph: fix double __remove_osd() problem

commit 7eb71e03 upstream.

It turns out it's possible to get __remove_osd() called twice on the
same OSD.  That doesn't sit well with rb_erase() - depending on the
shape of the tree we can get a NULL dereference, a soft lockup or
a random crash at some point in the future as we end up touching freed
memory.  One scenario that I was able to reproduce is as follows:

            <osd3 is idle, on the osd lru list>
<con reset - osd3>
con_fault_finish()
  osd_reset()
                              <osdmap - osd3 down>
                              ceph_osdc_handle_map()
                                <takes map_sem>
                                kick_requests()
                                  <takes request_mutex>
                                  reset_changed_osds()
                                    __reset_osd()
                                      __remove_osd()
                                  <releases request_mutex>
                                <releases map_sem>
    <takes map_sem>
    <takes request_mutex>
    __kick_osd_requests()
      __reset_osd()
        __remove_osd() <-- !!!

A case can be made that osd refcounting is imperfect and reworking it
would be a proper resolution, but for now Sage and I decided to fix
this by adding a safe guard around __remove_osd().

Fixes: http://tracker.ceph.com/issues/8087

Cc: Sage Weil <sage@redhat.com>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
Reviewed-by: default avatarSage Weil <sage@redhat.com>
Reviewed-by: default avatarAlex Elder <elder@linaro.org>
[bwh: Backported to 3.2: adjust context]
Signed-off-by: default avatarBen Hutchings <ben@decadent.org.uk>
parent 766dde01
...@@ -678,12 +678,22 @@ static void put_osd(struct ceph_osd *osd) ...@@ -678,12 +678,22 @@ static void put_osd(struct ceph_osd *osd)
*/ */
static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
{ {
dout("__remove_osd %p\n", osd); dout("%s %p osd%d\n", __func__, osd, osd->o_osd);
BUG_ON(!list_empty(&osd->o_requests)); BUG_ON(!list_empty(&osd->o_requests));
rb_erase(&osd->o_node, &osdc->osds);
list_del_init(&osd->o_osd_lru); list_del_init(&osd->o_osd_lru);
ceph_con_close(&osd->o_con); rb_erase(&osd->o_node, &osdc->osds);
put_osd(osd); RB_CLEAR_NODE(&osd->o_node);
}
static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
{
dout("%s %p osd%d\n", __func__, osd, osd->o_osd);
if (!RB_EMPTY_NODE(&osd->o_node)) {
ceph_con_close(&osd->o_con);
__remove_osd(osdc, osd);
put_osd(osd);
}
} }
static void remove_all_osds(struct ceph_osd_client *osdc) static void remove_all_osds(struct ceph_osd_client *osdc)
...@@ -693,7 +703,7 @@ static void remove_all_osds(struct ceph_osd_client *osdc) ...@@ -693,7 +703,7 @@ static void remove_all_osds(struct ceph_osd_client *osdc)
while (!RB_EMPTY_ROOT(&osdc->osds)) { while (!RB_EMPTY_ROOT(&osdc->osds)) {
struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
struct ceph_osd, o_node); struct ceph_osd, o_node);
__remove_osd(osdc, osd); remove_osd(osdc, osd);
} }
mutex_unlock(&osdc->request_mutex); mutex_unlock(&osdc->request_mutex);
} }
...@@ -723,7 +733,7 @@ static void remove_old_osds(struct ceph_osd_client *osdc) ...@@ -723,7 +733,7 @@ static void remove_old_osds(struct ceph_osd_client *osdc)
list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
if (time_before(jiffies, osd->lru_ttl)) if (time_before(jiffies, osd->lru_ttl))
break; break;
__remove_osd(osdc, osd); remove_osd(osdc, osd);
} }
mutex_unlock(&osdc->request_mutex); mutex_unlock(&osdc->request_mutex);
} }
...@@ -739,7 +749,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) ...@@ -739,7 +749,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
dout("__reset_osd %p osd%d\n", osd, osd->o_osd); dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
if (list_empty(&osd->o_requests) && if (list_empty(&osd->o_requests) &&
list_empty(&osd->o_linger_requests)) { list_empty(&osd->o_linger_requests)) {
__remove_osd(osdc, osd); remove_osd(osdc, osd);
} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
&osd->o_con.peer_addr, &osd->o_con.peer_addr,
sizeof(osd->o_con.peer_addr)) == 0 && sizeof(osd->o_con.peer_addr)) == 0 &&
...@@ -1271,6 +1281,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) ...@@ -1271,6 +1281,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
{ {
struct rb_node *p, *n; struct rb_node *p, *n;
dout("%s %p\n", __func__, osdc);
for (p = rb_first(&osdc->osds); p; p = n) { for (p = rb_first(&osdc->osds); p; p = n) {
struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node); struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment