Commit 87ef1202 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-4.17-rc2' of git://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "A couple of follow-up patches for -rc1 changes in rbd, support for a
  timeout on waiting for the acquisition of exclusive lock and a fix for
  uninitialized memory access in CephFS, marked for stable"

* tag 'ceph-for-4.17-rc2' of git://github.com/ceph/ceph-client:
  rbd: notrim map option
  rbd: adjust queue limits for "fancy" striping
  rbd: avoid Wreturn-type warnings
  ceph: always update atime/mtime/ctime for new inode
  rbd: support timeout in rbd_wait_state_locked()
  rbd: refactor rbd_wait_state_locked()
parents a27fc142 d9360540
...@@ -732,6 +732,7 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) ...@@ -732,6 +732,7 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
*/ */
enum { enum {
Opt_queue_depth, Opt_queue_depth,
Opt_lock_timeout,
Opt_last_int, Opt_last_int,
/* int args above */ /* int args above */
Opt_last_string, Opt_last_string,
...@@ -740,11 +741,13 @@ enum { ...@@ -740,11 +741,13 @@ enum {
Opt_read_write, Opt_read_write,
Opt_lock_on_read, Opt_lock_on_read,
Opt_exclusive, Opt_exclusive,
Opt_notrim,
Opt_err Opt_err
}; };
static match_table_t rbd_opts_tokens = { static match_table_t rbd_opts_tokens = {
{Opt_queue_depth, "queue_depth=%d"}, {Opt_queue_depth, "queue_depth=%d"},
{Opt_lock_timeout, "lock_timeout=%d"},
/* int args above */ /* int args above */
/* string args above */ /* string args above */
{Opt_read_only, "read_only"}, {Opt_read_only, "read_only"},
...@@ -753,20 +756,25 @@ static match_table_t rbd_opts_tokens = { ...@@ -753,20 +756,25 @@ static match_table_t rbd_opts_tokens = {
{Opt_read_write, "rw"}, /* Alternate spelling */ {Opt_read_write, "rw"}, /* Alternate spelling */
{Opt_lock_on_read, "lock_on_read"}, {Opt_lock_on_read, "lock_on_read"},
{Opt_exclusive, "exclusive"}, {Opt_exclusive, "exclusive"},
{Opt_notrim, "notrim"},
{Opt_err, NULL} {Opt_err, NULL}
}; };
struct rbd_options { struct rbd_options {
int queue_depth; int queue_depth;
unsigned long lock_timeout;
bool read_only; bool read_only;
bool lock_on_read; bool lock_on_read;
bool exclusive; bool exclusive;
bool trim;
}; };
#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ #define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ
#define RBD_LOCK_TIMEOUT_DEFAULT 0 /* no timeout */
#define RBD_READ_ONLY_DEFAULT false #define RBD_READ_ONLY_DEFAULT false
#define RBD_LOCK_ON_READ_DEFAULT false #define RBD_LOCK_ON_READ_DEFAULT false
#define RBD_EXCLUSIVE_DEFAULT false #define RBD_EXCLUSIVE_DEFAULT false
#define RBD_TRIM_DEFAULT true
static int parse_rbd_opts_token(char *c, void *private) static int parse_rbd_opts_token(char *c, void *private)
{ {
...@@ -796,6 +804,14 @@ static int parse_rbd_opts_token(char *c, void *private) ...@@ -796,6 +804,14 @@ static int parse_rbd_opts_token(char *c, void *private)
} }
rbd_opts->queue_depth = intval; rbd_opts->queue_depth = intval;
break; break;
case Opt_lock_timeout:
/* 0 is "wait forever" (i.e. infinite timeout) */
if (intval < 0 || intval > INT_MAX / 1000) {
pr_err("lock_timeout out of range\n");
return -EINVAL;
}
rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000);
break;
case Opt_read_only: case Opt_read_only:
rbd_opts->read_only = true; rbd_opts->read_only = true;
break; break;
...@@ -808,6 +824,9 @@ static int parse_rbd_opts_token(char *c, void *private) ...@@ -808,6 +824,9 @@ static int parse_rbd_opts_token(char *c, void *private)
case Opt_exclusive: case Opt_exclusive:
rbd_opts->exclusive = true; rbd_opts->exclusive = true;
break; break;
case Opt_notrim:
rbd_opts->trim = false;
break;
default: default:
/* libceph prints "bad option" msg */ /* libceph prints "bad option" msg */
return -EINVAL; return -EINVAL;
...@@ -1392,7 +1411,7 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req) ...@@ -1392,7 +1411,7 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req)
case OBJ_OP_DISCARD: case OBJ_OP_DISCARD:
return true; return true;
default: default:
rbd_assert(0); BUG();
} }
} }
...@@ -2466,7 +2485,7 @@ static bool rbd_obj_handle_write(struct rbd_obj_request *obj_req) ...@@ -2466,7 +2485,7 @@ static bool rbd_obj_handle_write(struct rbd_obj_request *obj_req)
} }
return false; return false;
default: default:
rbd_assert(0); BUG();
} }
} }
...@@ -2494,7 +2513,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req) ...@@ -2494,7 +2513,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req)
} }
return false; return false;
default: default:
rbd_assert(0); BUG();
} }
} }
...@@ -3533,9 +3552,22 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, ...@@ -3533,9 +3552,22 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
/* /*
* lock_rwsem must be held for read * lock_rwsem must be held for read
*/ */
static void rbd_wait_state_locked(struct rbd_device *rbd_dev) static int rbd_wait_state_locked(struct rbd_device *rbd_dev, bool may_acquire)
{ {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
unsigned long timeout;
int ret = 0;
if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags))
return -EBLACKLISTED;
if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
return 0;
if (!may_acquire) {
rbd_warn(rbd_dev, "exclusive lock required");
return -EROFS;
}
do { do {
/* /*
...@@ -3547,12 +3579,22 @@ static void rbd_wait_state_locked(struct rbd_device *rbd_dev) ...@@ -3547,12 +3579,22 @@ static void rbd_wait_state_locked(struct rbd_device *rbd_dev)
prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait, prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
up_read(&rbd_dev->lock_rwsem); up_read(&rbd_dev->lock_rwsem);
schedule(); timeout = schedule_timeout(ceph_timeout_jiffies(
rbd_dev->opts->lock_timeout));
down_read(&rbd_dev->lock_rwsem); down_read(&rbd_dev->lock_rwsem);
} while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)); ret = -EBLACKLISTED;
break;
}
if (!timeout) {
rbd_warn(rbd_dev, "timed out waiting for lock");
ret = -ETIMEDOUT;
break;
}
} while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);
finish_wait(&rbd_dev->lock_waitq, &wait); finish_wait(&rbd_dev->lock_waitq, &wait);
return ret;
} }
static void rbd_queue_workfn(struct work_struct *work) static void rbd_queue_workfn(struct work_struct *work)
...@@ -3638,19 +3680,10 @@ static void rbd_queue_workfn(struct work_struct *work) ...@@ -3638,19 +3680,10 @@ static void rbd_queue_workfn(struct work_struct *work)
(op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read); (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read);
if (must_be_locked) { if (must_be_locked) {
down_read(&rbd_dev->lock_rwsem); down_read(&rbd_dev->lock_rwsem);
if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && result = rbd_wait_state_locked(rbd_dev,
!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { !rbd_dev->opts->exclusive);
if (rbd_dev->opts->exclusive) { if (result)
rbd_warn(rbd_dev, "exclusive lock required");
result = -EROFS;
goto err_unlock;
}
rbd_wait_state_locked(rbd_dev);
}
if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
result = -EBLACKLISTED;
goto err_unlock; goto err_unlock;
}
} }
img_request = rbd_img_request_create(rbd_dev, op_type, snapc); img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
...@@ -3902,7 +3935,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) ...@@ -3902,7 +3935,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
{ {
struct gendisk *disk; struct gendisk *disk;
struct request_queue *q; struct request_queue *q;
u64 segment_size; unsigned int objset_bytes =
rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
int err; int err;
/* create gendisk info */ /* create gendisk info */
...@@ -3942,20 +3976,19 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) ...@@ -3942,20 +3976,19 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
blk_queue_flag_set(QUEUE_FLAG_NONROT, q); blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
/* set io sizes to object size */ blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT);
segment_size = rbd_obj_bytes(&rbd_dev->header);
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
q->limits.max_sectors = queue_max_hw_sectors(q); q->limits.max_sectors = queue_max_hw_sectors(q);
blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segments(q, USHRT_MAX);
blk_queue_max_segment_size(q, UINT_MAX); blk_queue_max_segment_size(q, UINT_MAX);
blk_queue_io_min(q, segment_size); blk_queue_io_min(q, objset_bytes);
blk_queue_io_opt(q, segment_size); blk_queue_io_opt(q, objset_bytes);
/* enable the discard support */ if (rbd_dev->opts->trim) {
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
q->limits.discard_granularity = segment_size; q->limits.discard_granularity = objset_bytes;
blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
}
if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
...@@ -5179,8 +5212,10 @@ static int rbd_add_parse_args(const char *buf, ...@@ -5179,8 +5212,10 @@ static int rbd_add_parse_args(const char *buf,
rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT; rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
rbd_opts->trim = RBD_TRIM_DEFAULT;
copts = ceph_parse_options(options, mon_addrs, copts = ceph_parse_options(options, mon_addrs,
mon_addrs + mon_addrs_size - 1, mon_addrs + mon_addrs_size - 1,
...@@ -5216,6 +5251,8 @@ static void rbd_dev_image_unlock(struct rbd_device *rbd_dev) ...@@ -5216,6 +5251,8 @@ static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
{ {
int ret;
if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
return -EINVAL; return -EINVAL;
...@@ -5223,9 +5260,9 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) ...@@ -5223,9 +5260,9 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
/* FIXME: "rbd map --exclusive" should be in interruptible */ /* FIXME: "rbd map --exclusive" should be in interruptible */
down_read(&rbd_dev->lock_rwsem); down_read(&rbd_dev->lock_rwsem);
rbd_wait_state_locked(rbd_dev); ret = rbd_wait_state_locked(rbd_dev, true);
up_read(&rbd_dev->lock_rwsem); up_read(&rbd_dev->lock_rwsem);
if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { if (ret) {
rbd_warn(rbd_dev, "failed to acquire exclusive lock"); rbd_warn(rbd_dev, "failed to acquire exclusive lock");
return -EROFS; return -EROFS;
} }
......
...@@ -669,13 +669,15 @@ void ceph_fill_file_time(struct inode *inode, int issued, ...@@ -669,13 +669,15 @@ void ceph_fill_file_time(struct inode *inode, int issued,
CEPH_CAP_FILE_BUFFER| CEPH_CAP_FILE_BUFFER|
CEPH_CAP_AUTH_EXCL| CEPH_CAP_AUTH_EXCL|
CEPH_CAP_XATTR_EXCL)) { CEPH_CAP_XATTR_EXCL)) {
if (timespec_compare(ctime, &inode->i_ctime) > 0) { if (ci->i_version == 0 ||
timespec_compare(ctime, &inode->i_ctime) > 0) {
dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
ctime->tv_sec, ctime->tv_nsec); ctime->tv_sec, ctime->tv_nsec);
inode->i_ctime = *ctime; inode->i_ctime = *ctime;
} }
if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { if (ci->i_version == 0 ||
ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
/* the MDS did a utimes() */ /* the MDS did a utimes() */
dout("mtime %ld.%09ld -> %ld.%09ld " dout("mtime %ld.%09ld -> %ld.%09ld "
"tw %d -> %d\n", "tw %d -> %d\n",
...@@ -795,7 +797,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ...@@ -795,7 +797,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
new_issued = ~issued & le32_to_cpu(info->cap.caps); new_issued = ~issued & le32_to_cpu(info->cap.caps);
/* update inode */ /* update inode */
ci->i_version = le64_to_cpu(info->version);
inode->i_rdev = le32_to_cpu(info->rdev); inode->i_rdev = le32_to_cpu(info->rdev);
inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
...@@ -868,6 +869,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ...@@ -868,6 +869,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
xattr_blob = NULL; xattr_blob = NULL;
} }
/* finally update i_version */
ci->i_version = le64_to_cpu(info->version);
inode->i_mapping->a_ops = &ceph_aops; inode->i_mapping->a_ops = &ceph_aops;
switch (inode->i_mode & S_IFMT) { switch (inode->i_mode & S_IFMT) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment