Commit 8d2d441a authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull Ceph updates from Sage Weil:
 "There is a lot of refactoring and hardening of the libceph and rbd
  code here from Ilya that fix various smaller bugs, and a few more
  important fixes with clone overlap.  The main fix is a critical change
  to the request_fn handling to not sleep that was exposed by the recent
  mutex changes (which will also go to the 3.16 stable series).

  Yan Zheng has several fixes in here for CephFS fixing ACL handling,
  time stamps, and request resends when the MDS restarts.

  Finally, there are a few cleanups from Himangi Saraogi based on
  Coccinelle"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (39 commits)
  libceph: set last_piece in ceph_msg_data_pages_cursor_init() correctly
  rbd: remove extra newlines from rbd_warn() messages
  rbd: allocate img_request with GFP_NOIO instead GFP_ATOMIC
  rbd: rework rbd_request_fn()
  ceph: fix kick_requests()
  ceph: fix append mode write
  ceph: fix sizeof(struct tYpO *) typo
  ceph: remove redundant memset(0)
  rbd: take snap_id into account when reading in parent info
  rbd: do not read in parent info before snap context
  rbd: update mapping size only on refresh
  rbd: harden rbd_dev_refresh() and callers a bit
  rbd: split rbd_dev_spec_update() into two functions
  rbd: remove unnecessary asserts in rbd_dev_image_probe()
  rbd: introduce rbd_dev_header_info()
  rbd: show the entire chain of parent images
  ceph: replace comma with a semicolon
  rbd: use rbd_segment_name_free() instead of kfree()
  ceph: check zero length in ceph_sync_read()
  ceph: reset r_resend_mds after receiving -ESTALE
  ...
parents 89838b80 5f740d7e
...@@ -94,5 +94,5 @@ current_snap ...@@ -94,5 +94,5 @@ current_snap
parent parent
Information identifying the pool, image, and snapshot id for Information identifying the chain of parent images in a layered rbd
the parent image in a layered rbd image (format 2 only). image. Entries are separated by empty lines.
This diff is collapsed.
...@@ -172,14 +172,24 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) ...@@ -172,14 +172,24 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
int ceph_init_acl(struct dentry *dentry, struct inode *inode, struct inode *dir) int ceph_init_acl(struct dentry *dentry, struct inode *inode, struct inode *dir)
{ {
struct posix_acl *default_acl, *acl; struct posix_acl *default_acl, *acl;
umode_t new_mode = inode->i_mode;
int error; int error;
error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); error = posix_acl_create(dir, &new_mode, &default_acl, &acl);
if (error) if (error)
return error; return error;
if (!default_acl && !acl) if (!default_acl && !acl) {
cache_no_acl(inode); cache_no_acl(inode);
if (new_mode != inode->i_mode) {
struct iattr newattrs = {
.ia_mode = new_mode,
.ia_valid = ATTR_MODE,
};
error = ceph_setattr(dentry, &newattrs);
}
return error;
}
if (default_acl) { if (default_acl) {
error = ceph_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); error = ceph_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
......
...@@ -3277,7 +3277,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode, ...@@ -3277,7 +3277,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
rel->ino = cpu_to_le64(ceph_ino(inode)); rel->ino = cpu_to_le64(ceph_ino(inode));
rel->cap_id = cpu_to_le64(cap->cap_id); rel->cap_id = cpu_to_le64(cap->cap_id);
rel->seq = cpu_to_le32(cap->seq); rel->seq = cpu_to_le32(cap->seq);
rel->issue_seq = cpu_to_le32(cap->issue_seq), rel->issue_seq = cpu_to_le32(cap->issue_seq);
rel->mseq = cpu_to_le32(cap->mseq); rel->mseq = cpu_to_le32(cap->mseq);
rel->caps = cpu_to_le32(cap->implemented); rel->caps = cpu_to_le32(cap->implemented);
rel->wanted = cpu_to_le32(cap->mds_wanted); rel->wanted = cpu_to_le32(cap->mds_wanted);
......
...@@ -423,6 +423,9 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, ...@@ -423,6 +423,9 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
dout("sync_read on file %p %llu~%u %s\n", file, off, dout("sync_read on file %p %llu~%u %s\n", file, off,
(unsigned)len, (unsigned)len,
(file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
if (!len)
return 0;
/* /*
* flush any page cache pages in this range. this * flush any page cache pages in this range. this
* will make concurrent normal and sync io slow, * will make concurrent normal and sync io slow,
...@@ -470,8 +473,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, ...@@ -470,8 +473,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
size_t left = ret; size_t left = ret;
while (left) { while (left) {
int copy = min_t(size_t, PAGE_SIZE, left); size_t page_off = off & ~PAGE_MASK;
l = copy_page_to_iter(pages[k++], 0, copy, i); size_t copy = min_t(size_t,
PAGE_SIZE - page_off, left);
l = copy_page_to_iter(pages[k++], page_off,
copy, i);
off += l; off += l;
left -= l; left -= l;
if (l < copy) if (l < copy)
...@@ -531,7 +537,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) ...@@ -531,7 +537,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
* objects, rollback on failure, etc.) * objects, rollback on failure, etc.)
*/ */
static ssize_t static ssize_t
ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from) ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
...@@ -547,7 +553,6 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -547,7 +553,6 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
int check_caps = 0; int check_caps = 0;
int ret; int ret;
struct timespec mtime = CURRENT_TIME; struct timespec mtime = CURRENT_TIME;
loff_t pos = iocb->ki_pos;
size_t count = iov_iter_count(from); size_t count = iov_iter_count(from);
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
...@@ -646,7 +651,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -646,7 +651,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
* correct atomic write, we should e.g. take write locks on all * correct atomic write, we should e.g. take write locks on all
* objects, rollback on failure, etc.) * objects, rollback on failure, etc.)
*/ */
static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from) static ssize_t
ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
...@@ -663,7 +669,6 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -663,7 +669,6 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from)
int check_caps = 0; int check_caps = 0;
int ret; int ret;
struct timespec mtime = CURRENT_TIME; struct timespec mtime = CURRENT_TIME;
loff_t pos = iocb->ki_pos;
size_t count = iov_iter_count(from); size_t count = iov_iter_count(from);
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
...@@ -918,9 +923,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -918,9 +923,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* we might need to revert back to that point */ /* we might need to revert back to that point */
data = *from; data = *from;
if (file->f_flags & O_DIRECT) if (file->f_flags & O_DIRECT)
written = ceph_sync_direct_write(iocb, &data); written = ceph_sync_direct_write(iocb, &data, pos);
else else
written = ceph_sync_write(iocb, &data); written = ceph_sync_write(iocb, &data, pos);
if (written == -EOLDSNAPC) { if (written == -EOLDSNAPC) {
dout("aio_write %p %llx.%llx %llu~%u" dout("aio_write %p %llx.%llx %llu~%u"
"got EOLDSNAPC, retrying\n", "got EOLDSNAPC, retrying\n",
...@@ -1177,6 +1182,9 @@ static long ceph_fallocate(struct file *file, int mode, ...@@ -1177,6 +1182,9 @@ static long ceph_fallocate(struct file *file, int mode,
loff_t endoff = 0; loff_t endoff = 0;
loff_t size; loff_t size;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP; return -EOPNOTSUPP;
......
...@@ -1904,6 +1904,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, ...@@ -1904,6 +1904,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
if (req->r_got_unsafe) { if (req->r_got_unsafe) {
void *p;
/* /*
* Replay. Do not regenerate message (and rebuild * Replay. Do not regenerate message (and rebuild
* paths, etc.); just use the original message. * paths, etc.); just use the original message.
...@@ -1924,8 +1925,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, ...@@ -1924,8 +1925,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
/* remove cap/dentry releases from message */ /* remove cap/dentry releases from message */
rhead->num_releases = 0; rhead->num_releases = 0;
msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
msg->front.iov_len = req->r_request_release_offset; /* time stamp */
p = msg->front.iov_base + req->r_request_release_offset;
ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp));
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
return 0; return 0;
} }
...@@ -2061,11 +2067,12 @@ static void __wake_requests(struct ceph_mds_client *mdsc, ...@@ -2061,11 +2067,12 @@ static void __wake_requests(struct ceph_mds_client *mdsc,
static void kick_requests(struct ceph_mds_client *mdsc, int mds) static void kick_requests(struct ceph_mds_client *mdsc, int mds)
{ {
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct rb_node *p; struct rb_node *p = rb_first(&mdsc->request_tree);
dout("kick_requests mds%d\n", mds); dout("kick_requests mds%d\n", mds);
for (p = rb_first(&mdsc->request_tree); p; p = rb_next(p)) { while (p) {
req = rb_entry(p, struct ceph_mds_request, r_node); req = rb_entry(p, struct ceph_mds_request, r_node);
p = rb_next(p);
if (req->r_got_unsafe) if (req->r_got_unsafe)
continue; continue;
if (req->r_session && if (req->r_session &&
...@@ -2248,6 +2255,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) ...@@ -2248,6 +2255,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
*/ */
if (result == -ESTALE) { if (result == -ESTALE) {
dout("got ESTALE on request %llu", req->r_tid); dout("got ESTALE on request %llu", req->r_tid);
req->r_resend_mds = -1;
if (req->r_direct_mode != USE_AUTH_MDS) { if (req->r_direct_mode != USE_AUTH_MDS) {
dout("not using auth, setting for that now"); dout("not using auth, setting for that now");
req->r_direct_mode = USE_AUTH_MDS; req->r_direct_mode = USE_AUTH_MDS;
......
...@@ -592,12 +592,12 @@ static int __build_xattrs(struct inode *inode) ...@@ -592,12 +592,12 @@ static int __build_xattrs(struct inode *inode)
xattr_version = ci->i_xattrs.version; xattr_version = ci->i_xattrs.version;
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *), xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
GFP_NOFS); GFP_NOFS);
err = -ENOMEM; err = -ENOMEM;
if (!xattrs) if (!xattrs)
goto bad_lock; goto bad_lock;
memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
for (i = 0; i < numattr; i++) { for (i = 0; i < numattr; i++) {
xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr), xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
GFP_NOFS); GFP_NOFS);
......
...@@ -285,19 +285,9 @@ extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, ...@@ -285,19 +285,9 @@ extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
bool can_fail); bool can_fail);
extern void ceph_msg_kfree(struct ceph_msg *m);
extern struct ceph_msg *ceph_msg_get(struct ceph_msg *msg);
static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg) extern void ceph_msg_put(struct ceph_msg *msg);
{
kref_get(&msg->kref);
return msg;
}
extern void ceph_msg_last_put(struct kref *kref);
static inline void ceph_msg_put(struct ceph_msg *msg)
{
kref_put(&msg->kref, ceph_msg_last_put);
}
extern void ceph_msg_dump(struct ceph_msg *msg); extern void ceph_msg_dump(struct ceph_msg *msg);
......
...@@ -117,7 +117,7 @@ struct ceph_osd_request { ...@@ -117,7 +117,7 @@ struct ceph_osd_request {
struct list_head r_req_lru_item; struct list_head r_req_lru_item;
struct list_head r_osd_item; struct list_head r_osd_item;
struct list_head r_linger_item; struct list_head r_linger_item;
struct list_head r_linger_osd; struct list_head r_linger_osd_item;
struct ceph_osd *r_osd; struct ceph_osd *r_osd;
struct ceph_pg r_pgid; struct ceph_pg r_pgid;
int r_pg_osds[CEPH_PG_MAX_SIZE]; int r_pg_osds[CEPH_PG_MAX_SIZE];
...@@ -325,22 +325,14 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, ...@@ -325,22 +325,14 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
struct ceph_osd_request *req); struct ceph_osd_request *req);
extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req);
static inline void ceph_osdc_get_request(struct ceph_osd_request *req) extern void ceph_osdc_get_request(struct ceph_osd_request *req);
{ extern void ceph_osdc_put_request(struct ceph_osd_request *req);
kref_get(&req->r_kref);
}
extern void ceph_osdc_release_request(struct kref *kref);
static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
{
kref_put(&req->r_kref, ceph_osdc_release_request);
}
extern int ceph_osdc_start_request(struct ceph_osd_client *osdc, extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req, struct ceph_osd_request *req,
bool nofail); bool nofail);
extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req); struct ceph_osd_request *req);
extern void ceph_osdc_sync(struct ceph_osd_client *osdc); extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
......
...@@ -174,6 +174,7 @@ static struct lock_class_key socket_class; ...@@ -174,6 +174,7 @@ static struct lock_class_key socket_class;
#define SKIP_BUF_SIZE 1024 #define SKIP_BUF_SIZE 1024
static void queue_con(struct ceph_connection *con); static void queue_con(struct ceph_connection *con);
static void cancel_con(struct ceph_connection *con);
static void con_work(struct work_struct *); static void con_work(struct work_struct *);
static void con_fault(struct ceph_connection *con); static void con_fault(struct ceph_connection *con);
...@@ -680,7 +681,7 @@ void ceph_con_close(struct ceph_connection *con) ...@@ -680,7 +681,7 @@ void ceph_con_close(struct ceph_connection *con)
reset_connection(con); reset_connection(con);
con->peer_global_seq = 0; con->peer_global_seq = 0;
cancel_delayed_work(&con->work); cancel_con(con);
con_close_socket(con); con_close_socket(con);
mutex_unlock(&con->mutex); mutex_unlock(&con->mutex);
} }
...@@ -900,7 +901,7 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor, ...@@ -900,7 +901,7 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
BUG_ON(page_count > (int)USHRT_MAX); BUG_ON(page_count > (int)USHRT_MAX);
cursor->page_count = (unsigned short)page_count; cursor->page_count = (unsigned short)page_count;
BUG_ON(length > SIZE_MAX - cursor->page_offset); BUG_ON(length > SIZE_MAX - cursor->page_offset);
cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE; cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE;
} }
static struct page * static struct page *
...@@ -2667,19 +2668,16 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay) ...@@ -2667,19 +2668,16 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay)
{ {
if (!con->ops->get(con)) { if (!con->ops->get(con)) {
dout("%s %p ref count 0\n", __func__, con); dout("%s %p ref count 0\n", __func__, con);
return -ENOENT; return -ENOENT;
} }
if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
dout("%s %p - already queued\n", __func__, con); dout("%s %p - already queued\n", __func__, con);
con->ops->put(con); con->ops->put(con);
return -EBUSY; return -EBUSY;
} }
dout("%s %p %lu\n", __func__, con, delay); dout("%s %p %lu\n", __func__, con, delay);
return 0; return 0;
} }
...@@ -2688,6 +2686,14 @@ static void queue_con(struct ceph_connection *con) ...@@ -2688,6 +2686,14 @@ static void queue_con(struct ceph_connection *con)
(void) queue_con_delay(con, 0); (void) queue_con_delay(con, 0);
} }
static void cancel_con(struct ceph_connection *con)
{
if (cancel_delayed_work(&con->work)) {
dout("%s %p\n", __func__, con);
con->ops->put(con);
}
}
static bool con_sock_closed(struct ceph_connection *con) static bool con_sock_closed(struct ceph_connection *con)
{ {
if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED)) if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED))
...@@ -3269,24 +3275,21 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) ...@@ -3269,24 +3275,21 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
/* /*
* Free a generically kmalloc'd message. * Free a generically kmalloc'd message.
*/ */
void ceph_msg_kfree(struct ceph_msg *m) static void ceph_msg_free(struct ceph_msg *m)
{ {
dout("msg_kfree %p\n", m); dout("%s %p\n", __func__, m);
ceph_kvfree(m->front.iov_base); ceph_kvfree(m->front.iov_base);
kmem_cache_free(ceph_msg_cache, m); kmem_cache_free(ceph_msg_cache, m);
} }
/* static void ceph_msg_release(struct kref *kref)
* Drop a msg ref. Destroy as needed.
*/
void ceph_msg_last_put(struct kref *kref)
{ {
struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
LIST_HEAD(data); LIST_HEAD(data);
struct list_head *links; struct list_head *links;
struct list_head *next; struct list_head *next;
dout("ceph_msg_put last one on %p\n", m); dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head)); WARN_ON(!list_empty(&m->list_head));
/* drop middle, data, if any */ /* drop middle, data, if any */
...@@ -3308,9 +3311,25 @@ void ceph_msg_last_put(struct kref *kref) ...@@ -3308,9 +3311,25 @@ void ceph_msg_last_put(struct kref *kref)
if (m->pool) if (m->pool)
ceph_msgpool_put(m->pool, m); ceph_msgpool_put(m->pool, m);
else else
ceph_msg_kfree(m); ceph_msg_free(m);
}
struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
{
dout("%s %p (was %d)\n", __func__, msg,
atomic_read(&msg->kref.refcount));
kref_get(&msg->kref);
return msg;
}
EXPORT_SYMBOL(ceph_msg_get);
void ceph_msg_put(struct ceph_msg *msg)
{
dout("%s %p (was %d)\n", __func__, msg,
atomic_read(&msg->kref.refcount));
kref_put(&msg->kref, ceph_msg_release);
} }
EXPORT_SYMBOL(ceph_msg_last_put); EXPORT_SYMBOL(ceph_msg_put);
void ceph_msg_dump(struct ceph_msg *msg) void ceph_msg_dump(struct ceph_msg *msg)
{ {
......
...@@ -297,12 +297,21 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, ...@@ -297,12 +297,21 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
/* /*
* requests * requests
*/ */
void ceph_osdc_release_request(struct kref *kref) static void ceph_osdc_release_request(struct kref *kref)
{ {
struct ceph_osd_request *req; struct ceph_osd_request *req = container_of(kref,
struct ceph_osd_request, r_kref);
unsigned int which; unsigned int which;
req = container_of(kref, struct ceph_osd_request, r_kref); dout("%s %p (r_request %p r_reply %p)\n", __func__, req,
req->r_request, req->r_reply);
WARN_ON(!RB_EMPTY_NODE(&req->r_node));
WARN_ON(!list_empty(&req->r_req_lru_item));
WARN_ON(!list_empty(&req->r_osd_item));
WARN_ON(!list_empty(&req->r_linger_item));
WARN_ON(!list_empty(&req->r_linger_osd_item));
WARN_ON(req->r_osd);
if (req->r_request) if (req->r_request)
ceph_msg_put(req->r_request); ceph_msg_put(req->r_request);
if (req->r_reply) { if (req->r_reply) {
...@@ -320,7 +329,22 @@ void ceph_osdc_release_request(struct kref *kref) ...@@ -320,7 +329,22 @@ void ceph_osdc_release_request(struct kref *kref)
kmem_cache_free(ceph_osd_request_cache, req); kmem_cache_free(ceph_osd_request_cache, req);
} }
EXPORT_SYMBOL(ceph_osdc_release_request);
void ceph_osdc_get_request(struct ceph_osd_request *req)
{
dout("%s %p (was %d)\n", __func__, req,
atomic_read(&req->r_kref.refcount));
kref_get(&req->r_kref);
}
EXPORT_SYMBOL(ceph_osdc_get_request);
void ceph_osdc_put_request(struct ceph_osd_request *req)
{
dout("%s %p (was %d)\n", __func__, req,
atomic_read(&req->r_kref.refcount));
kref_put(&req->r_kref, ceph_osdc_release_request);
}
EXPORT_SYMBOL(ceph_osdc_put_request);
struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc, struct ceph_snap_context *snapc,
...@@ -364,7 +388,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, ...@@ -364,7 +388,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
RB_CLEAR_NODE(&req->r_node); RB_CLEAR_NODE(&req->r_node);
INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_unsafe_item);
INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_item);
INIT_LIST_HEAD(&req->r_linger_osd); INIT_LIST_HEAD(&req->r_linger_osd_item);
INIT_LIST_HEAD(&req->r_req_lru_item); INIT_LIST_HEAD(&req->r_req_lru_item);
INIT_LIST_HEAD(&req->r_osd_item); INIT_LIST_HEAD(&req->r_osd_item);
...@@ -916,7 +940,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, ...@@ -916,7 +940,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
* list at the end to keep things in tid order. * list at the end to keep things in tid order.
*/ */
list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
r_linger_osd) { r_linger_osd_item) {
/* /*
* reregister request prior to unregistering linger so * reregister request prior to unregistering linger so
* that r_osd is preserved. * that r_osd is preserved.
...@@ -1008,6 +1032,8 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) ...@@ -1008,6 +1032,8 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
{ {
dout("__remove_osd %p\n", osd); dout("__remove_osd %p\n", osd);
BUG_ON(!list_empty(&osd->o_requests)); BUG_ON(!list_empty(&osd->o_requests));
BUG_ON(!list_empty(&osd->o_linger_requests));
rb_erase(&osd->o_node, &osdc->osds); rb_erase(&osd->o_node, &osdc->osds);
list_del_init(&osd->o_osd_lru); list_del_init(&osd->o_osd_lru);
ceph_con_close(&osd->o_con); ceph_con_close(&osd->o_con);
...@@ -1029,12 +1055,23 @@ static void remove_all_osds(struct ceph_osd_client *osdc) ...@@ -1029,12 +1055,23 @@ static void remove_all_osds(struct ceph_osd_client *osdc)
static void __move_osd_to_lru(struct ceph_osd_client *osdc, static void __move_osd_to_lru(struct ceph_osd_client *osdc,
struct ceph_osd *osd) struct ceph_osd *osd)
{ {
dout("__move_osd_to_lru %p\n", osd); dout("%s %p\n", __func__, osd);
BUG_ON(!list_empty(&osd->o_osd_lru)); BUG_ON(!list_empty(&osd->o_osd_lru));
list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
} }
static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
struct ceph_osd *osd)
{
dout("%s %p\n", __func__, osd);
if (list_empty(&osd->o_requests) &&
list_empty(&osd->o_linger_requests))
__move_osd_to_lru(osdc, osd);
}
static void __remove_osd_from_lru(struct ceph_osd *osd) static void __remove_osd_from_lru(struct ceph_osd *osd)
{ {
dout("__remove_osd_from_lru %p\n", osd); dout("__remove_osd_from_lru %p\n", osd);
...@@ -1175,6 +1212,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, ...@@ -1175,6 +1212,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
dout("__unregister_request %p tid %lld\n", req, req->r_tid); dout("__unregister_request %p tid %lld\n", req, req->r_tid);
rb_erase(&req->r_node, &osdc->requests); rb_erase(&req->r_node, &osdc->requests);
RB_CLEAR_NODE(&req->r_node);
osdc->num_requests--; osdc->num_requests--;
if (req->r_osd) { if (req->r_osd) {
...@@ -1182,12 +1220,8 @@ static void __unregister_request(struct ceph_osd_client *osdc, ...@@ -1182,12 +1220,8 @@ static void __unregister_request(struct ceph_osd_client *osdc,
ceph_msg_revoke(req->r_request); ceph_msg_revoke(req->r_request);
list_del_init(&req->r_osd_item); list_del_init(&req->r_osd_item);
if (list_empty(&req->r_osd->o_requests) && maybe_move_osd_to_lru(osdc, req->r_osd);
list_empty(&req->r_osd->o_linger_requests)) { if (list_empty(&req->r_linger_osd_item))
dout("moving osd to %p lru\n", req->r_osd);
__move_osd_to_lru(osdc, req->r_osd);
}
if (list_empty(&req->r_linger_item))
req->r_osd = NULL; req->r_osd = NULL;
} }
...@@ -1214,45 +1248,39 @@ static void __cancel_request(struct ceph_osd_request *req) ...@@ -1214,45 +1248,39 @@ static void __cancel_request(struct ceph_osd_request *req)
static void __register_linger_request(struct ceph_osd_client *osdc, static void __register_linger_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req) struct ceph_osd_request *req)
{ {
dout("__register_linger_request %p\n", req); dout("%s %p tid %llu\n", __func__, req, req->r_tid);
WARN_ON(!req->r_linger);
ceph_osdc_get_request(req); ceph_osdc_get_request(req);
list_add_tail(&req->r_linger_item, &osdc->req_linger); list_add_tail(&req->r_linger_item, &osdc->req_linger);
if (req->r_osd) if (req->r_osd)
list_add_tail(&req->r_linger_osd, list_add_tail(&req->r_linger_osd_item,
&req->r_osd->o_linger_requests); &req->r_osd->o_linger_requests);
} }
static void __unregister_linger_request(struct ceph_osd_client *osdc, static void __unregister_linger_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req) struct ceph_osd_request *req)
{ {
dout("__unregister_linger_request %p\n", req); WARN_ON(!req->r_linger);
list_del_init(&req->r_linger_item);
if (req->r_osd) {
list_del_init(&req->r_linger_osd);
if (list_empty(&req->r_osd->o_requests) && if (list_empty(&req->r_linger_item)) {
list_empty(&req->r_osd->o_linger_requests)) { dout("%s %p tid %llu not registered\n", __func__, req,
dout("moving osd to %p lru\n", req->r_osd); req->r_tid);
__move_osd_to_lru(osdc, req->r_osd); return;
} }
dout("%s %p tid %llu\n", __func__, req, req->r_tid);
list_del_init(&req->r_linger_item);
if (req->r_osd) {
list_del_init(&req->r_linger_osd_item);
maybe_move_osd_to_lru(osdc, req->r_osd);
if (list_empty(&req->r_osd_item)) if (list_empty(&req->r_osd_item))
req->r_osd = NULL; req->r_osd = NULL;
} }
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
} }
void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
{
mutex_lock(&osdc->request_mutex);
if (req->r_linger) {
req->r_linger = 0;
__unregister_linger_request(osdc, req);
}
mutex_unlock(&osdc->request_mutex);
}
EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
struct ceph_osd_request *req) struct ceph_osd_request *req)
{ {
...@@ -2429,6 +2457,25 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, ...@@ -2429,6 +2457,25 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
} }
EXPORT_SYMBOL(ceph_osdc_start_request); EXPORT_SYMBOL(ceph_osdc_start_request);
/*
* Unregister a registered request. The request is not completed (i.e.
* no callbacks or wakeups) - higher layers are supposed to know what
* they are canceling.
*/
void ceph_osdc_cancel_request(struct ceph_osd_request *req)
{
struct ceph_osd_client *osdc = req->r_osdc;
mutex_lock(&osdc->request_mutex);
if (req->r_linger)
__unregister_linger_request(osdc, req);
__unregister_request(osdc, req);
mutex_unlock(&osdc->request_mutex);
dout("%s %p tid %llu canceled\n", __func__, req, req->r_tid);
}
EXPORT_SYMBOL(ceph_osdc_cancel_request);
/* /*
* wait for a request to complete * wait for a request to complete
*/ */
...@@ -2437,18 +2484,18 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, ...@@ -2437,18 +2484,18 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
{ {
int rc; int rc;
dout("%s %p tid %llu\n", __func__, req, req->r_tid);
rc = wait_for_completion_interruptible(&req->r_completion); rc = wait_for_completion_interruptible(&req->r_completion);
if (rc < 0) { if (rc < 0) {
mutex_lock(&osdc->request_mutex); dout("%s %p tid %llu interrupted\n", __func__, req, req->r_tid);
__cancel_request(req); ceph_osdc_cancel_request(req);
__unregister_request(osdc, req);
mutex_unlock(&osdc->request_mutex);
complete_request(req); complete_request(req);
dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
return rc; return rc;
} }
dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); dout("%s %p tid %llu result %d\n", __func__, req, req->r_tid,
req->r_result);
return req->r_result; return req->r_result;
} }
EXPORT_SYMBOL(ceph_osdc_wait_request); EXPORT_SYMBOL(ceph_osdc_wait_request);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment