Commit 57bb5595 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (23 commits)
  ceph: fix cap flush race reentrancy
  libceph: subscribe to osdmap when cluster is full
  libceph: handle new osdmap down/state change encoding
  rbd: handle online resize of underlying rbd image
  ceph: avoid inode lookup on nfs fh reconnect
  ceph: use LOOKUPINO to make unconnected nfs fh more reliable
  rbd: use snprintf for disk->disk_name
  rbd: cleanup: make kfree match kmalloc
  rbd: warn on update_snaps failure on notify
  ceph: check return value for start_request in writepages
  ceph: remove useless check
  libceph: add missing breaks in addr_set_port
  libceph: fix TAG_WAIT case
  ceph: fix broken comparison in readdir loop
  libceph: fix osdmap timestamp assignment
  ceph: fix rare potential cap leak
  libceph: use snprintf for unknown addrs
  libceph: use snprintf for formatting object name
  ceph: use snprintf for dirstat content
  libceph: fix uninitialized value when no get_authorizer method is set
  ...
parents 2a651c7f db354052
...@@ -1191,14 +1191,19 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev, ...@@ -1191,14 +1191,19 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev,
static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
{ {
struct rbd_device *dev = (struct rbd_device *)data; struct rbd_device *dev = (struct rbd_device *)data;
int rc;
if (!dev) if (!dev)
return; return;
dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
notify_id, (int)opcode); notify_id, (int)opcode);
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
__rbd_update_snaps(dev); rc = __rbd_update_snaps(dev);
mutex_unlock(&ctl_mutex); mutex_unlock(&ctl_mutex);
if (rc)
pr_warning(DRV_NAME "%d got notification but failed to update"
" snaps: %d\n", dev->major, rc);
rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
} }
...@@ -1597,7 +1602,7 @@ static int rbd_header_add_snap(struct rbd_device *dev, ...@@ -1597,7 +1602,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
int name_len = strlen(snap_name); int name_len = strlen(snap_name);
u64 new_snapid; u64 new_snapid;
int ret; int ret;
void *data, *data_start, *data_end; void *data, *p, *e;
u64 ver; u64 ver;
/* we should create a snapshot only if we're pointing at the head */ /* we should create a snapshot only if we're pointing at the head */
...@@ -1614,16 +1619,16 @@ static int rbd_header_add_snap(struct rbd_device *dev, ...@@ -1614,16 +1619,16 @@ static int rbd_header_add_snap(struct rbd_device *dev,
if (!data) if (!data)
return -ENOMEM; return -ENOMEM;
data_start = data; p = data;
data_end = data + name_len + 16; e = data + name_len + 16;
ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad); ceph_encode_string_safe(&p, e, snap_name, name_len, bad);
ceph_encode_64_safe(&data, data_end, new_snapid, bad); ceph_encode_64_safe(&p, e, new_snapid, bad);
ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
data_start, data - data_start, &ver); data, p - data, &ver);
kfree(data_start); kfree(data);
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -1659,6 +1664,9 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev) ...@@ -1659,6 +1664,9 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev)
if (ret < 0) if (ret < 0)
return ret; return ret;
/* resized? */
set_capacity(rbd_dev->disk, h.image_size / 512ULL);
down_write(&rbd_dev->header.snap_rwsem); down_write(&rbd_dev->header.snap_rwsem);
snap_seq = rbd_dev->header.snapc->seq; snap_seq = rbd_dev->header.snapc->seq;
...@@ -1716,7 +1724,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) ...@@ -1716,7 +1724,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
if (!disk) if (!disk)
goto out; goto out;
sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id); snprintf(disk->disk_name, sizeof(disk->disk_name), DRV_NAME "%d",
rbd_dev->id);
disk->major = rbd_dev->major; disk->major = rbd_dev->major;
disk->first_minor = 0; disk->first_minor = 0;
disk->fops = &rbd_bd_ops; disk->fops = &rbd_bd_ops;
......
...@@ -848,7 +848,8 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -848,7 +848,8 @@ static int ceph_writepages_start(struct address_space *mapping,
op->payload_len = cpu_to_le32(len); op->payload_len = cpu_to_le32(len);
req->r_request->hdr.data_len = cpu_to_le32(len); req->r_request->hdr.data_len = cpu_to_le32(len);
ceph_osdc_start_request(&fsc->client->osdc, req, true); rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
BUG_ON(rc);
req = NULL; req = NULL;
/* continue? */ /* continue? */
...@@ -880,8 +881,6 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -880,8 +881,6 @@ static int ceph_writepages_start(struct address_space *mapping,
out: out:
if (req) if (req)
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
if (rc > 0)
rc = 0; /* vfs expects us to return 0 */
ceph_put_snap_context(snapc); ceph_put_snap_context(snapc);
dout("writepages done, rc = %d\n", rc); dout("writepages done, rc = %d\n", rc);
return rc; return rc;
......
...@@ -569,7 +569,8 @@ int ceph_add_cap(struct inode *inode, ...@@ -569,7 +569,8 @@ int ceph_add_cap(struct inode *inode,
list_add_tail(&cap->session_caps, &session->s_caps); list_add_tail(&cap->session_caps, &session->s_caps);
session->s_nr_caps++; session->s_nr_caps++;
spin_unlock(&session->s_cap_lock); spin_unlock(&session->s_cap_lock);
} } else if (new_cap)
ceph_put_cap(mdsc, new_cap);
if (!ci->i_snap_realm) { if (!ci->i_snap_realm) {
/* /*
...@@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, ...@@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
struct ceph_mds_session *session, struct ceph_mds_session *session,
int *open_target_sessions) int *open_target_sessions)
{ {
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds; int mds = session->s_mds;
unsigned mseq = le32_to_cpu(ex->migrate_seq); unsigned mseq = le32_to_cpu(ex->migrate_seq);
...@@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, ...@@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
* export targets, so that we get the matching IMPORT * export targets, so that we get the matching IMPORT
*/ */
*open_target_sessions = 1; *open_target_sessions = 1;
/*
* we can't flush dirty caps that we've seen the
* EXPORT but no IMPORT for
*/
spin_lock(&mdsc->cap_dirty_lock);
if (!list_empty(&ci->i_dirty_item)) {
dout(" moving %p to cap_dirty_migrating\n",
inode);
list_move(&ci->i_dirty_item,
&mdsc->cap_dirty_migrating);
}
spin_unlock(&mdsc->cap_dirty_lock);
} }
__ceph_remove_cap(cap); __ceph_remove_cap(cap);
} }
...@@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, ...@@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
ci->i_cap_exporting_issued = 0; ci->i_cap_exporting_issued = 0;
ci->i_cap_exporting_mseq = 0; ci->i_cap_exporting_mseq = 0;
ci->i_cap_exporting_mds = -1; ci->i_cap_exporting_mds = -1;
spin_lock(&mdsc->cap_dirty_lock);
if (!list_empty(&ci->i_dirty_item)) {
dout(" moving %p back to cap_dirty\n", inode);
list_move(&ci->i_dirty_item, &mdsc->cap_dirty);
}
spin_unlock(&mdsc->cap_dirty_lock);
} else { } else {
dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",
inode, ci, mds, mseq); inode, ci, mds, mseq);
...@@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) ...@@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
*/ */
void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
{ {
struct ceph_inode_info *ci, *nci = NULL; struct ceph_inode_info *ci;
struct inode *inode, *ninode = NULL; struct inode *inode;
struct list_head *p, *n;
dout("flush_dirty_caps\n"); dout("flush_dirty_caps\n");
spin_lock(&mdsc->cap_dirty_lock); spin_lock(&mdsc->cap_dirty_lock);
list_for_each_safe(p, n, &mdsc->cap_dirty) { while (!list_empty(&mdsc->cap_dirty)) {
if (nci) { ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
ci = nci; i_dirty_item);
inode = ninode; inode = igrab(&ci->vfs_inode);
ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; dout("flush_dirty_caps %p\n", inode);
dout("flush_dirty_caps inode %p (was next inode)\n",
inode);
} else {
ci = list_entry(p, struct ceph_inode_info,
i_dirty_item);
inode = igrab(&ci->vfs_inode);
BUG_ON(!inode);
dout("flush_dirty_caps inode %p\n", inode);
}
if (n != &mdsc->cap_dirty) {
nci = list_entry(n, struct ceph_inode_info,
i_dirty_item);
ninode = igrab(&nci->vfs_inode);
BUG_ON(!ninode);
nci->i_ceph_flags |= CEPH_I_NOFLUSH;
dout("flush_dirty_caps next inode %p, noflush\n",
ninode);
} else {
nci = NULL;
ninode = NULL;
}
spin_unlock(&mdsc->cap_dirty_lock); spin_unlock(&mdsc->cap_dirty_lock);
if (inode) { if (inode) {
ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
...@@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) ...@@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
spin_lock(&mdsc->cap_dirty_lock); spin_lock(&mdsc->cap_dirty_lock);
} }
spin_unlock(&mdsc->cap_dirty_lock); spin_unlock(&mdsc->cap_dirty_lock);
dout("flush_dirty_caps done\n");
} }
/* /*
......
...@@ -360,7 +360,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) ...@@ -360,7 +360,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
rinfo = &fi->last_readdir->r_reply_info; rinfo = &fi->last_readdir->r_reply_info;
dout("readdir frag %x num %d off %d chunkoff %d\n", frag, dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
rinfo->dir_nr, off, fi->offset); rinfo->dir_nr, off, fi->offset);
while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) { while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) {
u64 pos = ceph_make_fpos(frag, off); u64 pos = ceph_make_fpos(frag, off);
struct ceph_mds_reply_inode *in = struct ceph_mds_reply_inode *in =
rinfo->dir_in[off - fi->offset].in; rinfo->dir_in[off - fi->offset].in;
...@@ -1066,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, ...@@ -1066,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
struct inode *inode = file->f_dentry->d_inode; struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
int left; int left;
const int bufsize = 1024;
if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
return -EISDIR; return -EISDIR;
if (!cf->dir_info) { if (!cf->dir_info) {
cf->dir_info = kmalloc(1024, GFP_NOFS); cf->dir_info = kmalloc(bufsize, GFP_NOFS);
if (!cf->dir_info) if (!cf->dir_info)
return -ENOMEM; return -ENOMEM;
cf->dir_info_len = cf->dir_info_len =
sprintf(cf->dir_info, snprintf(cf->dir_info, bufsize,
"entries: %20lld\n" "entries: %20lld\n"
" files: %20lld\n" " files: %20lld\n"
" subdirs: %20lld\n" " subdirs: %20lld\n"
......
...@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, ...@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
static struct dentry *__fh_to_dentry(struct super_block *sb, static struct dentry *__fh_to_dentry(struct super_block *sb,
struct ceph_nfs_fh *fh) struct ceph_nfs_fh *fh)
{ {
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode; struct inode *inode;
struct dentry *dentry; struct dentry *dentry;
struct ceph_vino vino; struct ceph_vino vino;
...@@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, ...@@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
vino.ino = fh->ino; vino.ino = fh->ino;
vino.snap = CEPH_NOSNAP; vino.snap = CEPH_NOSNAP;
inode = ceph_find_inode(sb, vino); inode = ceph_find_inode(sb, vino);
if (!inode) if (!inode) {
return ERR_PTR(-ESTALE); struct ceph_mds_request *req;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
USE_ANY_MDS);
if (IS_ERR(req))
return ERR_CAST(req);
req->r_ino1 = vino;
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
inode = req->r_target_inode;
if (inode)
igrab(inode);
ceph_mdsc_put_request(req);
if (!inode)
return ERR_PTR(-ESTALE);
}
dentry = d_obtain_alias(inode); dentry = d_obtain_alias(inode);
if (IS_ERR(dentry)) { if (IS_ERR(dentry)) {
...@@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, ...@@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
req->r_num_caps = 1; req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req); err = ceph_mdsc_do_request(mdsc, NULL, req);
inode = req->r_target_inode;
if (inode)
igrab(inode);
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
inode = ceph_find_inode(sb, vino);
if (!inode) if (!inode)
return ERR_PTR(err ? err : -ESTALE); return ERR_PTR(err ? err : -ESTALE);
} }
......
...@@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc, ...@@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
if (dir) { if (dir) {
struct ceph_inode_info *ci = ceph_inode(dir); struct ceph_inode_info *ci = ceph_inode(dir);
ihold(dir);
spin_lock(&ci->i_unsafe_lock); spin_lock(&ci->i_unsafe_lock);
req->r_unsafe_dir = dir; req->r_unsafe_dir = dir;
list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
...@@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc, ...@@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
spin_lock(&ci->i_unsafe_lock); spin_lock(&ci->i_unsafe_lock);
list_del_init(&req->r_unsafe_dir_item); list_del_init(&req->r_unsafe_dir_item);
spin_unlock(&ci->i_unsafe_lock); spin_unlock(&ci->i_unsafe_lock);
iput(req->r_unsafe_dir);
req->r_unsafe_dir = NULL;
} }
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
...@@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, ...@@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
{ {
struct super_block *sb = mdsc->fsc->sb; struct super_block *sb = mdsc->fsc->sb;
struct inode *inode; struct inode *inode;
struct ceph_inode_info *ci;
struct dentry *parent, *dentry; struct dentry *parent, *dentry;
struct ceph_dentry_info *di; struct ceph_dentry_info *di;
int mds = session->s_mds; int mds = session->s_mds;
...@@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, ...@@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
dout("handle_lease no inode %llx\n", vino.ino); dout("handle_lease no inode %llx\n", vino.ino);
goto release; goto release;
} }
ci = ceph_inode(inode);
/* dentry */ /* dentry */
parent = d_find_alias(inode); parent = d_find_alias(inode);
...@@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) ...@@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
spin_lock_init(&mdsc->snap_flush_lock); spin_lock_init(&mdsc->snap_flush_lock);
mdsc->cap_flush_seq = 0; mdsc->cap_flush_seq = 0;
INIT_LIST_HEAD(&mdsc->cap_dirty); INIT_LIST_HEAD(&mdsc->cap_dirty);
INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
mdsc->num_cap_flushing = 0; mdsc->num_cap_flushing = 0;
spin_lock_init(&mdsc->cap_dirty_lock); spin_lock_init(&mdsc->cap_dirty_lock);
init_waitqueue_head(&mdsc->cap_flushing_wq); init_waitqueue_head(&mdsc->cap_flushing_wq);
......
...@@ -278,6 +278,7 @@ struct ceph_mds_client { ...@@ -278,6 +278,7 @@ struct ceph_mds_client {
u64 cap_flush_seq; u64 cap_flush_seq;
struct list_head cap_dirty; /* inodes with dirty caps */ struct list_head cap_dirty; /* inodes with dirty caps */
struct list_head cap_dirty_migrating; /* ...that are migration... */
int num_cap_flushing; /* # caps we are flushing */ int num_cap_flushing; /* # caps we are flushing */
spinlock_t cap_dirty_lock; /* protects above items */ spinlock_t cap_dirty_lock; /* protects above items */
wait_queue_head_t cap_flushing_wq; wait_queue_head_t cap_flushing_wq;
......
...@@ -313,6 +313,7 @@ enum { ...@@ -313,6 +313,7 @@ enum {
CEPH_MDS_OP_GETATTR = 0x00101, CEPH_MDS_OP_GETATTR = 0x00101,
CEPH_MDS_OP_LOOKUPHASH = 0x00102, CEPH_MDS_OP_LOOKUPHASH = 0x00102,
CEPH_MDS_OP_LOOKUPPARENT = 0x00103, CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
CEPH_MDS_OP_LOOKUPINO = 0x00104,
CEPH_MDS_OP_SETXATTR = 0x01105, CEPH_MDS_OP_SETXATTR = 0x01105,
CEPH_MDS_OP_RMXATTR = 0x01106, CEPH_MDS_OP_RMXATTR = 0x01106,
......
...@@ -76,7 +76,8 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss) ...@@ -76,7 +76,8 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss)
break; break;
default: default:
sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family); snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)",
(int)ss->ss_family);
} }
return s; return s;
...@@ -598,7 +599,7 @@ static void prepare_write_keepalive(struct ceph_connection *con) ...@@ -598,7 +599,7 @@ static void prepare_write_keepalive(struct ceph_connection *con)
* Connection negotiation. * Connection negotiation.
*/ */
static void prepare_connect_authorizer(struct ceph_connection *con) static int prepare_connect_authorizer(struct ceph_connection *con)
{ {
void *auth_buf; void *auth_buf;
int auth_len = 0; int auth_len = 0;
...@@ -612,13 +613,20 @@ static void prepare_connect_authorizer(struct ceph_connection *con) ...@@ -612,13 +613,20 @@ static void prepare_connect_authorizer(struct ceph_connection *con)
con->auth_retry); con->auth_retry);
mutex_lock(&con->mutex); mutex_lock(&con->mutex);
if (test_bit(CLOSED, &con->state) ||
test_bit(OPENING, &con->state))
return -EAGAIN;
con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
con->out_connect.authorizer_len = cpu_to_le32(auth_len); con->out_connect.authorizer_len = cpu_to_le32(auth_len);
con->out_kvec[con->out_kvec_left].iov_base = auth_buf; if (auth_len) {
con->out_kvec[con->out_kvec_left].iov_len = auth_len; con->out_kvec[con->out_kvec_left].iov_base = auth_buf;
con->out_kvec_left++; con->out_kvec[con->out_kvec_left].iov_len = auth_len;
con->out_kvec_bytes += auth_len; con->out_kvec_left++;
con->out_kvec_bytes += auth_len;
}
return 0;
} }
/* /*
...@@ -640,9 +648,9 @@ static void prepare_write_banner(struct ceph_messenger *msgr, ...@@ -640,9 +648,9 @@ static void prepare_write_banner(struct ceph_messenger *msgr,
set_bit(WRITE_PENDING, &con->state); set_bit(WRITE_PENDING, &con->state);
} }
static void prepare_write_connect(struct ceph_messenger *msgr, static int prepare_write_connect(struct ceph_messenger *msgr,
struct ceph_connection *con, struct ceph_connection *con,
int after_banner) int after_banner)
{ {
unsigned global_seq = get_global_seq(con->msgr, 0); unsigned global_seq = get_global_seq(con->msgr, 0);
int proto; int proto;
...@@ -683,7 +691,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, ...@@ -683,7 +691,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
con->out_more = 0; con->out_more = 0;
set_bit(WRITE_PENDING, &con->state); set_bit(WRITE_PENDING, &con->state);
prepare_connect_authorizer(con); return prepare_connect_authorizer(con);
} }
...@@ -1065,8 +1073,10 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) ...@@ -1065,8 +1073,10 @@ static void addr_set_port(struct sockaddr_storage *ss, int p)
switch (ss->ss_family) { switch (ss->ss_family) {
case AF_INET: case AF_INET:
((struct sockaddr_in *)ss)->sin_port = htons(p); ((struct sockaddr_in *)ss)->sin_port = htons(p);
break;
case AF_INET6: case AF_INET6:
((struct sockaddr_in6 *)ss)->sin6_port = htons(p); ((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
break;
} }
} }
...@@ -1216,6 +1226,7 @@ static int process_connect(struct ceph_connection *con) ...@@ -1216,6 +1226,7 @@ static int process_connect(struct ceph_connection *con)
u64 sup_feat = con->msgr->supported_features; u64 sup_feat = con->msgr->supported_features;
u64 req_feat = con->msgr->required_features; u64 req_feat = con->msgr->required_features;
u64 server_feat = le64_to_cpu(con->in_reply.features); u64 server_feat = le64_to_cpu(con->in_reply.features);
int ret;
dout("process_connect on %p tag %d\n", con, (int)con->in_tag); dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
...@@ -1250,7 +1261,9 @@ static int process_connect(struct ceph_connection *con) ...@@ -1250,7 +1261,9 @@ static int process_connect(struct ceph_connection *con)
return -1; return -1;
} }
con->auth_retry = 1; con->auth_retry = 1;
prepare_write_connect(con->msgr, con, 0); ret = prepare_write_connect(con->msgr, con, 0);
if (ret < 0)
return ret;
prepare_read_connect(con); prepare_read_connect(con);
break; break;
...@@ -1277,6 +1290,9 @@ static int process_connect(struct ceph_connection *con) ...@@ -1277,6 +1290,9 @@ static int process_connect(struct ceph_connection *con)
if (con->ops->peer_reset) if (con->ops->peer_reset)
con->ops->peer_reset(con); con->ops->peer_reset(con);
mutex_lock(&con->mutex); mutex_lock(&con->mutex);
if (test_bit(CLOSED, &con->state) ||
test_bit(OPENING, &con->state))
return -EAGAIN;
break; break;
case CEPH_MSGR_TAG_RETRY_SESSION: case CEPH_MSGR_TAG_RETRY_SESSION:
...@@ -1341,7 +1357,9 @@ static int process_connect(struct ceph_connection *con) ...@@ -1341,7 +1357,9 @@ static int process_connect(struct ceph_connection *con)
* to WAIT. This shouldn't happen if we are the * to WAIT. This shouldn't happen if we are the
* client. * client.
*/ */
pr_err("process_connect peer connecting WAIT\n"); pr_err("process_connect got WAIT as client\n");
con->error_msg = "protocol error, got WAIT as client";
return -1;
default: default:
pr_err("connect protocol error, will retry\n"); pr_err("connect protocol error, will retry\n");
...@@ -1810,6 +1828,17 @@ static int try_read(struct ceph_connection *con) ...@@ -1810,6 +1828,17 @@ static int try_read(struct ceph_connection *con)
more: more:
dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
con->in_base_pos); con->in_base_pos);
/*
* process_connect and process_message drop and re-take
* con->mutex. make sure we handle a racing close or reopen.
*/
if (test_bit(CLOSED, &con->state) ||
test_bit(OPENING, &con->state)) {
ret = -EAGAIN;
goto out;
}
if (test_bit(CONNECTING, &con->state)) { if (test_bit(CONNECTING, &con->state)) {
if (!test_bit(NEGOTIATING, &con->state)) { if (!test_bit(NEGOTIATING, &con->state)) {
dout("try_read connecting\n"); dout("try_read connecting\n");
...@@ -1938,8 +1967,10 @@ static void con_work(struct work_struct *work) ...@@ -1938,8 +1967,10 @@ static void con_work(struct work_struct *work)
{ {
struct ceph_connection *con = container_of(work, struct ceph_connection, struct ceph_connection *con = container_of(work, struct ceph_connection,
work.work); work.work);
int ret;
mutex_lock(&con->mutex); mutex_lock(&con->mutex);
restart:
if (test_and_clear_bit(BACKOFF, &con->state)) { if (test_and_clear_bit(BACKOFF, &con->state)) {
dout("con_work %p backing off\n", con); dout("con_work %p backing off\n", con);
if (queue_delayed_work(ceph_msgr_wq, &con->work, if (queue_delayed_work(ceph_msgr_wq, &con->work,
...@@ -1969,18 +2000,31 @@ static void con_work(struct work_struct *work) ...@@ -1969,18 +2000,31 @@ static void con_work(struct work_struct *work)
con_close_socket(con); con_close_socket(con);
} }
if (test_and_clear_bit(SOCK_CLOSED, &con->state) || if (test_and_clear_bit(SOCK_CLOSED, &con->state))
try_read(con) < 0 || goto fault;
try_write(con) < 0) {
mutex_unlock(&con->mutex); ret = try_read(con);
ceph_fault(con); /* error/fault path */ if (ret == -EAGAIN)
goto done_unlocked; goto restart;
} if (ret < 0)
goto fault;
ret = try_write(con);
if (ret == -EAGAIN)
goto restart;
if (ret < 0)
goto fault;
done: done:
mutex_unlock(&con->mutex); mutex_unlock(&con->mutex);
done_unlocked: done_unlocked:
con->ops->put(con); con->ops->put(con);
return;
fault:
mutex_unlock(&con->mutex);
ceph_fault(con); /* error/fault path */
goto done_unlocked;
} }
......
...@@ -124,7 +124,7 @@ static void calc_layout(struct ceph_osd_client *osdc, ...@@ -124,7 +124,7 @@ static void calc_layout(struct ceph_osd_client *osdc,
ceph_calc_raw_layout(osdc, layout, vino.snap, off, ceph_calc_raw_layout(osdc, layout, vino.snap, off,
plen, &bno, req, op); plen, &bno, req, op);
sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
req->r_oid_len = strlen(req->r_oid); req->r_oid_len = strlen(req->r_oid);
} }
...@@ -1421,6 +1421,15 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) ...@@ -1421,6 +1421,15 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
done: done:
downgrade_write(&osdc->map_sem); downgrade_write(&osdc->map_sem);
ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
/*
* subscribe to subsequent osdmap updates if full to ensure
* we find out when we are no longer full and stop returning
* ENOSPC.
*/
if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
ceph_monc_request_next_osdmap(&osdc->client->monc);
send_queued(osdc); send_queued(osdc);
up_read(&osdc->map_sem); up_read(&osdc->map_sem);
wake_up_all(&osdc->client->auth_wq); wake_up_all(&osdc->client->auth_wq);
...@@ -1677,8 +1686,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, ...@@ -1677,8 +1686,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
*/ */
if (req->r_sent == 0) { if (req->r_sent == 0) {
rc = __map_request(osdc, req); rc = __map_request(osdc, req);
if (rc < 0) if (rc < 0) {
if (nofail) {
dout("osdc_start_request failed map, "
" will retry %lld\n", req->r_tid);
rc = 0;
}
goto out_unlock; goto out_unlock;
}
if (req->r_osd == NULL) { if (req->r_osd == NULL) {
dout("send_request %p no up osds in pg\n", req); dout("send_request %p no up osds in pg\n", req);
ceph_monc_request_next_osdmap(&osdc->client->monc); ceph_monc_request_next_osdmap(&osdc->client->monc);
......
...@@ -765,7 +765,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -765,7 +765,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
} }
map->epoch++; map->epoch++;
map->modified = map->modified; map->modified = modified;
if (newcrush) { if (newcrush) {
if (map->crush) if (map->crush)
crush_destroy(map->crush); crush_destroy(map->crush);
...@@ -830,15 +830,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -830,15 +830,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
map->osd_addr[osd] = addr; map->osd_addr[osd] = addr;
} }
/* new_down */ /* new_state */
ceph_decode_32_safe(p, end, len, bad); ceph_decode_32_safe(p, end, len, bad);
while (len--) { while (len--) {
u32 osd; u32 osd;
u8 xorstate;
ceph_decode_32_safe(p, end, osd, bad); ceph_decode_32_safe(p, end, osd, bad);
xorstate = **(u8 **)p;
(*p)++; /* clean flag */ (*p)++; /* clean flag */
pr_info("osd%d down\n", osd); if (xorstate == 0)
xorstate = CEPH_OSD_UP;
if (xorstate & CEPH_OSD_UP)
pr_info("osd%d down\n", osd);
if (osd < map->max_osd) if (osd < map->max_osd)
map->osd_state[osd] &= ~CEPH_OSD_UP; map->osd_state[osd] ^= xorstate;
} }
/* new_weight */ /* new_weight */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment