Commit a4a1cc16 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'for-3.20/core' into for-3.20/drivers

We need the tagging changes for the libata conversion.
parents 121c7ad4 24391c0d
......@@ -283,23 +283,45 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
* @discard: whether to discard the block range
*
* Description:
* Generate and issue number of bios with zerofiled pages.
* Zero-fill a block range. If the discard flag is set and the block
* device guarantees that subsequent READ operations to the block range
* in question will return zeroes, the blocks will be discarded. Should
* the discard request fail, if the discard flag is not set, or if
* discard_zeroes_data is not supported, this function will resort to
* zeroing the blocks manually, thus provisioning (allocating,
* anchoring) them. If the block device supports the WRITE SAME command
* blkdev_issue_zeroout() will use it to optimize the process of
* clearing the block range. Otherwise the zeroing will be performed
* using regular WRITE calls.
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask)
sector_t nr_sects, gfp_t gfp_mask, bool discard)
{
struct request_queue *q = bdev_get_queue(bdev);
unsigned char bdn[BDEVNAME_SIZE];
if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data) {
if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0))
return 0;
bdevname(bdev, bdn);
pr_warn("%s: DISCARD failed. Manually zeroing.\n", bdn);
}
if (bdev_write_same(bdev)) {
unsigned char bdn[BDEVNAME_SIZE];
if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
ZERO_PAGE(0)))
return 0;
bdevname(bdev, bdn);
pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
pr_warn("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
}
return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
......
......@@ -140,35 +140,39 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
return atomic_read(&hctx->nr_active) < depth;
}
static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag,
bool nowrap)
{
int tag, org_last_tag, end;
bool wrap = last_tag != 0;
int tag, org_last_tag = last_tag;
org_last_tag = last_tag;
end = bm->depth;
do {
restart:
tag = find_next_zero_bit(&bm->word, end, last_tag);
if (unlikely(tag >= end)) {
while (1) {
tag = find_next_zero_bit(&bm->word, bm->depth, last_tag);
if (unlikely(tag >= bm->depth)) {
/*
* We started with an offset, start from 0 to
* We started with an offset, and we didn't reset the
* offset to 0 in a failure case, so start from 0 to
* exhaust the map.
*/
if (wrap) {
wrap = false;
end = org_last_tag;
last_tag = 0;
goto restart;
if (org_last_tag && last_tag && !nowrap) {
last_tag = org_last_tag = 0;
continue;
}
return -1;
}
if (!test_and_set_bit(tag, &bm->word))
break;
last_tag = tag + 1;
} while (test_and_set_bit(tag, &bm->word));
if (last_tag >= bm->depth - 1)
last_tag = 0;
}
return tag;
}
#define BT_ALLOC_RR(tags) (tags->alloc_policy == BLK_TAG_ALLOC_RR)
/*
* Straight forward bitmap tag implementation, where each bit is a tag
* (cleared == free, and set == busy). The small twist is using per-cpu
......@@ -181,7 +185,7 @@ static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
* until the map is exhausted.
*/
static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
unsigned int *tag_cache)
unsigned int *tag_cache, struct blk_mq_tags *tags)
{
unsigned int last_tag, org_last_tag;
int index, i, tag;
......@@ -193,15 +197,24 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
index = TAG_TO_INDEX(bt, last_tag);
for (i = 0; i < bt->map_nr; i++) {
tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag));
tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag),
BT_ALLOC_RR(tags));
if (tag != -1) {
tag += (index << bt->bits_per_word);
goto done;
}
last_tag = 0;
if (++index >= bt->map_nr)
/*
* Jump to next index, and reset the last tag to be the
* first tag of that index
*/
index++;
last_tag = (index << bt->bits_per_word);
if (index >= bt->map_nr) {
index = 0;
last_tag = 0;
}
}
*tag_cache = 0;
......@@ -212,7 +225,7 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
* up using the specific cached tag.
*/
done:
if (tag == org_last_tag) {
if (tag == org_last_tag || unlikely(BT_ALLOC_RR(tags))) {
last_tag = tag + 1;
if (last_tag >= bt->depth - 1)
last_tag = 0;
......@@ -241,13 +254,13 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
static int bt_get(struct blk_mq_alloc_data *data,
struct blk_mq_bitmap_tags *bt,
struct blk_mq_hw_ctx *hctx,
unsigned int *last_tag)
unsigned int *last_tag, struct blk_mq_tags *tags)
{
struct bt_wait_state *bs;
DEFINE_WAIT(wait);
int tag;
tag = __bt_get(hctx, bt, last_tag);
tag = __bt_get(hctx, bt, last_tag, tags);
if (tag != -1)
return tag;
......@@ -258,7 +271,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
do {
prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
tag = __bt_get(hctx, bt, last_tag);
tag = __bt_get(hctx, bt, last_tag, tags);
if (tag != -1)
break;
......@@ -273,7 +286,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
* Retry tag allocation after running the hardware queue,
* as running the queue may also have found completions.
*/
tag = __bt_get(hctx, bt, last_tag);
tag = __bt_get(hctx, bt, last_tag, tags);
if (tag != -1)
break;
......@@ -304,7 +317,7 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
int tag;
tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
&data->ctx->last_tag);
&data->ctx->last_tag, data->hctx->tags);
if (tag >= 0)
return tag + data->hctx->tags->nr_reserved_tags;
......@@ -320,7 +333,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
return BLK_MQ_TAG_FAIL;
}
tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero);
tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero,
data->hctx->tags);
if (tag < 0)
return BLK_MQ_TAG_FAIL;
......@@ -392,7 +406,8 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
BUG_ON(real_tag >= tags->nr_tags);
bt_clear_tag(&tags->bitmap_tags, real_tag);
*last_tag = real_tag;
if (likely(tags->alloc_policy == BLK_TAG_ALLOC_FIFO))
*last_tag = real_tag;
} else {
BUG_ON(tag >= tags->nr_reserved_tags);
bt_clear_tag(&tags->breserved_tags, tag);
......@@ -529,10 +544,12 @@ static void bt_free(struct blk_mq_bitmap_tags *bt)
}
static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
int node)
int node, int alloc_policy)
{
unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
tags->alloc_policy = alloc_policy;
if (bt_alloc(&tags->bitmap_tags, depth, node, false))
goto enomem;
if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true))
......@@ -546,7 +563,8 @@ static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
}
struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
unsigned int reserved_tags, int node)
unsigned int reserved_tags,
int node, int alloc_policy)
{
struct blk_mq_tags *tags;
......@@ -562,7 +580,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
tags->nr_tags = total_tags;
tags->nr_reserved_tags = reserved_tags;
return blk_mq_init_bitmap_tags(tags, node);
return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
}
void blk_mq_free_tags(struct blk_mq_tags *tags)
......
......@@ -42,10 +42,12 @@ struct blk_mq_tags {
struct request **rqs;
struct list_head page_list;
int alloc_policy;
};
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy);
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
......
......@@ -1374,7 +1374,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
size_t rq_size, left;
tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
set->numa_node);
set->numa_node,
BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
if (!tags)
return NULL;
......
......@@ -119,7 +119,7 @@ init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth)
}
static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
int depth)
int depth, int alloc_policy)
{
struct blk_queue_tag *tags;
......@@ -131,6 +131,8 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
goto fail;
atomic_set(&tags->refcnt, 1);
tags->alloc_policy = alloc_policy;
tags->next_tag = 0;
return tags;
fail:
kfree(tags);
......@@ -140,10 +142,11 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
/**
* blk_init_tags - initialize the tag info for an external tag map
* @depth: the maximum queue depth supported
* @alloc_policy: tag allocation policy
**/
struct blk_queue_tag *blk_init_tags(int depth)
struct blk_queue_tag *blk_init_tags(int depth, int alloc_policy)
{
return __blk_queue_init_tags(NULL, depth);
return __blk_queue_init_tags(NULL, depth, alloc_policy);
}
EXPORT_SYMBOL(blk_init_tags);
......@@ -152,19 +155,20 @@ EXPORT_SYMBOL(blk_init_tags);
* @q: the request queue for the device
* @depth: the maximum queue depth supported
* @tags: the tag to use
* @alloc_policy: tag allocation policy
*
* Queue lock must be held here if the function is called to resize an
* existing map.
**/
int blk_queue_init_tags(struct request_queue *q, int depth,
struct blk_queue_tag *tags)
struct blk_queue_tag *tags, int alloc_policy)
{
int rc;
BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
if (!tags && !q->queue_tags) {
tags = __blk_queue_init_tags(q, depth);
tags = __blk_queue_init_tags(q, depth, alloc_policy);
if (!tags)
return -ENOMEM;
......@@ -344,9 +348,21 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
}
do {
tag = find_first_zero_bit(bqt->tag_map, max_depth);
if (tag >= max_depth)
return 1;
if (bqt->alloc_policy == BLK_TAG_ALLOC_FIFO) {
tag = find_first_zero_bit(bqt->tag_map, max_depth);
if (tag >= max_depth)
return 1;
} else {
int start = bqt->next_tag;
int size = min_t(int, bqt->max_depth, max_depth + start);
tag = find_next_zero_bit(bqt->tag_map, size, start);
if (tag >= size && start + size > bqt->max_depth) {
size = start + size - bqt->max_depth;
tag = find_first_zero_bit(bqt->tag_map, size);
}
if (tag >= size)
return 1;
}
} while (test_and_set_bit_lock(tag, bqt->tag_map));
/*
......@@ -354,6 +370,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
* See blk_queue_end_tag for details.
*/
bqt->next_tag = (tag + 1) % bqt->max_depth;
rq->cmd_flags |= REQ_QUEUED;
rq->tag = tag;
bqt->tag_index[tag] = rq;
......
......@@ -3656,12 +3656,17 @@ static struct cfq_queue *
cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
struct bio *bio, gfp_t gfp_mask)
{
const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
struct cfq_queue **async_cfqq = NULL;
struct cfq_queue *cfqq = NULL;
if (!is_sync) {
if (!ioprio_valid(cic->ioprio)) {
struct task_struct *tsk = current;
ioprio = task_nice_ioprio(tsk);
ioprio_class = task_nice_ioclass(tsk);
}
async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
cfqq = *async_cfqq;
}
......
......@@ -198,7 +198,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
if (start + len > (i_size_read(bdev->bd_inode) >> 9))
return -EINVAL;
return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL);
return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL, false);
}
static int put_ushort(unsigned long arg, unsigned short val)
......
......@@ -184,12 +184,12 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
if (err)
/* The partition is unrecognized. So report I/O errors if there were any */
res = err;
if (!res)
strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE);
else if (warn_no_part)
strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE);
printk(KERN_INFO "%s", state->pp_buf);
if (res) {
if (warn_no_part)
strlcat(state->pp_buf,
" unable to read partition table\n", PAGE_SIZE);
printk(KERN_INFO "%s", state->pp_buf);
}
free_page((unsigned long)state->pp_buf);
free_partitions(state);
......
......@@ -1388,7 +1388,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
list_add_tail(&peer_req->w.list, &device->active_ee);
spin_unlock_irq(&device->resource->req_lock);
if (blkdev_issue_zeroout(device->ldev->backing_bdev,
sector, data_size >> 9, GFP_NOIO))
sector, data_size >> 9, GFP_NOIO, false))
peer_req->flags |= EE_WAS_ERROR;
drbd_endio_write_sec_final(peer_req);
return 0;
......
......@@ -423,7 +423,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev)
}
/* switch queue to TCQ mode; allocate tag map */
rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL);
rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO);
if (rc) {
blk_cleanup_queue(q);
put_disk(disk);
......
......@@ -2188,6 +2188,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
shost->tag_set.cmd_size = cmd_size;
shost->tag_set.numa_node = NUMA_NO_NODE;
shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
shost->tag_set.flags |=
BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
shost->tag_set.driver_data = shost;
return blk_mq_alloc_tag_set(&shost->tag_set);
......
......@@ -290,7 +290,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
if (!shost_use_blk_mq(sdev->host) &&
(shost->bqt || shost->hostt->use_blk_tags)) {
blk_queue_init_tags(sdev->request_queue,
sdev->host->cmd_per_lun, shost->bqt);
sdev->host->cmd_per_lun, shost->bqt,
shost->hostt->tag_alloc_policy);
}
scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun);
......
......@@ -147,6 +147,8 @@ enum {
BLK_MQ_F_SG_MERGE = 1 << 2,
BLK_MQ_F_SYSFS_UP = 1 << 3,
BLK_MQ_F_DEFER_ISSUE = 1 << 4,
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
BLK_MQ_F_ALLOC_POLICY_BITS = 1,
BLK_MQ_S_STOPPED = 0,
BLK_MQ_S_TAG_ACTIVE = 1,
......@@ -155,6 +157,12 @@ enum {
BLK_MQ_CPU_WORK_BATCH = 8,
};
#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
void blk_mq_finish_init(struct request_queue *q);
......
......@@ -272,7 +272,11 @@ struct blk_queue_tag {
int max_depth; /* what we will send to device */
int real_max_depth; /* what the array can hold */
atomic_t refcnt; /* map can be shared */
int alloc_policy; /* tag allocation policy */
int next_tag; /* next tag */
};
#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
#define BLK_SCSI_MAX_CMDS (256)
#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
......@@ -1139,11 +1143,11 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
extern int blk_queue_start_tag(struct request_queue *, struct request *);
extern struct request *blk_queue_find_tag(struct request_queue *, int);
extern void blk_queue_end_tag(struct request_queue *, struct request *);
extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int);
extern void blk_queue_free_tags(struct request_queue *);
extern int blk_queue_resize_tags(struct request_queue *, int);
extern void blk_queue_invalidate_tags(struct request_queue *);
extern struct blk_queue_tag *blk_init_tags(int);
extern struct blk_queue_tag *blk_init_tags(int, int);
extern void blk_free_tags(struct blk_queue_tag *);
static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
......@@ -1162,7 +1166,7 @@ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask);
sector_t nr_sects, gfp_t gfp_mask, bool discard);
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
{
......@@ -1176,7 +1180,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
return blkdev_issue_zeroout(sb->s_bdev,
block << (sb->s_blocksize_bits - 9),
nr_blocks << (sb->s_blocksize_bits - 9),
gfp_mask);
gfp_mask, true);
}
extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
......
......@@ -402,6 +402,9 @@ struct scsi_host_template {
*/
unsigned char present;
/* If use block layer to manage tags, this is tag allocation policy */
int tag_alloc_policy;
/*
* Let the block layer assigns tags to all commands.
*/
......
......@@ -66,7 +66,8 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
* devices on the shared host (for libata)
*/
if (!shost->bqt) {
shost->bqt = blk_init_tags(depth);
shost->bqt = blk_init_tags(depth,
shost->hostt->tag_alloc_policy);
if (!shost->bqt)
return -ENOMEM;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment