Commit 8d829b9b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "This contains a set of fixes for xen-blkback by way of Konrad, and a
  performance regression fix for blk-mq for shared tags.

  The latter could account for as much as a 50x reduction in
  performance, with the test case from the user with 500 name spaces. A
  more realistic setup on my end with 32 drives showed a 3.5x drop. The
  fix has been thoroughly tested before being committed"

* 'for-linus' of git://git.kernel.dk/linux-block:
  blk-mq: fix performance regression with shared tags
  xen-blkback: don't leak stack data via response ring
  xen/blkback: don't use xen_blkif_get() in xen-blkback kthread
  xen/blkback: don't free be structure too early
  xen/blkback: fix disconnect while I/Os in flight
parents 48b6bbef 8e8320c9
......@@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
}
/*
* Mark a hardware queue as needing a restart. For shared queues, maintain
* a count of how many hardware queues are marked for restart.
*/
static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return;
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;
if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_inc(&q->shared_hctx_restart);
} else
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return false;
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;
if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_dec(&q->shared_hctx_restart);
} else
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (blk_mq_hctx_has_pending(hctx)) {
blk_mq_run_hw_queue(hctx, true);
return true;
}
return false;
}
struct request *blk_mq_sched_get_request(struct request_queue *q,
struct bio *bio,
unsigned int op,
......@@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
return true;
}
static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (blk_mq_hctx_has_pending(hctx)) {
blk_mq_run_hw_queue(hctx, true);
return true;
}
}
return false;
}
/**
* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
* @pos: loop cursor.
......@@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
unsigned int i, j;
if (set->flags & BLK_MQ_F_TAG_SHARED) {
/*
* If this is 0, then we know that no hardware queues
* have RESTART marked. We're done.
*/
if (!atomic_read(&queue->shared_hctx_restart))
return;
rcu_read_lock();
list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
tag_set_list) {
......
......@@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
return false;
}
/*
* Mark a hardware queue as needing a restart.
*/
static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
{
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
......
......@@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
}
}
/*
* Caller needs to ensure that we're either frozen/quiesced, or that
* the queue isn't live yet.
*/
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
if (shared)
if (shared) {
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_inc(&q->shared_hctx_restart);
hctx->flags |= BLK_MQ_F_TAG_SHARED;
else
} else {
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_dec(&q->shared_hctx_restart);
hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
}
}
}
static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
bool shared)
{
struct request_queue *q;
......
......@@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg)
unsigned long timeout;
int ret;
xen_blkif_get(blkif);
set_freezable();
while (!kthread_should_stop()) {
if (try_to_freeze())
......@@ -665,7 +663,6 @@ int xen_blkif_schedule(void *arg)
print_stats(ring);
ring->xenblkd = NULL;
xen_blkif_put(blkif);
return 0;
}
......@@ -1436,34 +1433,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
static void make_response(struct xen_blkif_ring *ring, u64 id,
unsigned short op, int st)
{
struct blkif_response resp;
struct blkif_response *resp;
unsigned long flags;
union blkif_back_rings *blk_rings;
int notify;
resp.id = id;
resp.operation = op;
resp.status = st;
spin_lock_irqsave(&ring->blk_ring_lock, flags);
blk_rings = &ring->blk_rings;
/* Place on the response ring for the relevant domain. */
switch (ring->blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
&resp, sizeof(resp));
resp = RING_GET_RESPONSE(&blk_rings->native,
blk_rings->native.rsp_prod_pvt);
break;
case BLKIF_PROTOCOL_X86_32:
memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
&resp, sizeof(resp));
resp = RING_GET_RESPONSE(&blk_rings->x86_32,
blk_rings->x86_32.rsp_prod_pvt);
break;
case BLKIF_PROTOCOL_X86_64:
memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
&resp, sizeof(resp));
resp = RING_GET_RESPONSE(&blk_rings->x86_64,
blk_rings->x86_64.rsp_prod_pvt);
break;
default:
BUG();
}
resp->id = id;
resp->operation = op;
resp->status = st;
blk_rings->common.rsp_prod_pvt++;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
......
......@@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues;
struct blkif_common_request {
char dummy;
};
struct blkif_common_response {
char dummy;
};
/* i386 protocol version */
struct blkif_x86_32_request_rw {
uint8_t nr_segments; /* number of segments */
......@@ -129,14 +128,6 @@ struct blkif_x86_32_request {
} u;
} __attribute__((__packed__));
/* i386 protocol version */
#pragma pack(push, 4)
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
#pragma pack(pop)
/* x86_64 protocol version */
struct blkif_x86_64_request_rw {
......@@ -193,18 +184,12 @@ struct blkif_x86_64_request {
} u;
} __attribute__((__packed__));
struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
struct blkif_common_response);
struct blkif_response);
DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
struct blkif_x86_32_response);
struct blkif_response __packed);
DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
struct blkif_x86_64_response);
struct blkif_response);
union blkif_back_rings {
struct blkif_back_ring native;
......@@ -281,6 +266,7 @@ struct xen_blkif_ring {
wait_queue_head_t wq;
atomic_t inflight;
bool active;
/* One thread per blkif ring. */
struct task_struct *xenblkd;
unsigned int waiting_reqs;
......
......@@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
init_waitqueue_head(&ring->shutdown_wq);
ring->blkif = blkif;
ring->st_print = jiffies;
xen_blkif_get(blkif);
ring->active = true;
}
return 0;
......@@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
struct xen_blkif_ring *ring = &blkif->rings[r];
unsigned int i = 0;
if (!ring->active)
continue;
if (ring->xenblkd) {
kthread_stop(ring->xenblkd);
wake_up(&ring->shutdown_wq);
ring->xenblkd = NULL;
}
/* The above kthread_stop() guarantees that at this point we
......@@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
BUG_ON(ring->free_pages_num != 0);
BUG_ON(ring->persistent_gnt_c != 0);
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
xen_blkif_put(blkif);
ring->active = false;
}
blkif->nr_ring_pages = 0;
/*
......@@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
static void xen_blkif_free(struct xen_blkif *blkif)
{
xen_blkif_disconnect(blkif);
WARN_ON(xen_blkif_disconnect(blkif));
xen_vbd_free(&blkif->vbd);
kfree(blkif->be->mode);
kfree(blkif->be);
/* Make sure everything is drained before shutting down */
kmem_cache_free(xen_blkif_cachep, blkif);
......@@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
xen_blkif_put(be->blkif);
}
kfree(be->mode);
kfree(be);
return 0;
}
......
......@@ -391,6 +391,8 @@ struct request_queue {
int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
atomic_t shared_hctx_restart;
struct blk_queue_stats *stats;
struct rq_wb *rq_wb;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment