Commit 057d3ccf authored by Jens Axboe's avatar Jens Axboe

Merge branch 'stable/for-jens-4.19' of...

Merge branch 'stable/for-jens-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen into for-linus

Pull Xen block driver fixes from Konrad:

"Fix for flushing out persistent pages at a deterministic rate"

* 'stable/for-jens-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen/blkback: remove unused pers_gnts_lock from struct xen_blkif_ring
  xen/blkback: move persistent grants flags to bool
  xen/blkfront: reorder tests in xlblk_init()
  xen/blkfront: cleanup stale persistent grants
  xen/blkback: don't keep persistent grants too long
parents 38cfb5a4 6f2f39ad
...@@ -15,3 +15,13 @@ Description: ...@@ -15,3 +15,13 @@ Description:
blkback. If the frontend tries to use more than blkback. If the frontend tries to use more than
max_persistent_grants, the LRU kicks in and starts max_persistent_grants, the LRU kicks in and starts
removing 5% of max_persistent_grants every 100ms. removing 5% of max_persistent_grants every 100ms.
What: /sys/module/xen_blkback/parameters/persistent_grant_unused_seconds
Date: August 2018
KernelVersion: 4.19
Contact: Roger Pau Monné <roger.pau@citrix.com>
Description:
How long a persistent grant is allowed to remain
allocated without being in use. The time is in
seconds, 0 means indefinitely long.
The default is 60 seconds.
...@@ -83,6 +83,18 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); ...@@ -83,6 +83,18 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
MODULE_PARM_DESC(max_persistent_grants, MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently"); "Maximum number of grants to map persistently");
/*
* How long a persistent grant is allowed to remain allocated without being in
* use. The time is in seconds, 0 means indefinitely long.
*/
static unsigned int xen_blkif_pgrant_timeout = 60;
module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout,
uint, 0644);
MODULE_PARM_DESC(persistent_grant_unused_seconds,
"Time in seconds an unused persistent grant is allowed to "
"remain allocated. Default is 60, 0 means unlimited.");
/* /*
* Maximum number of rings/queues blkback supports, allow as many queues as there * Maximum number of rings/queues blkback supports, allow as many queues as there
* are CPUs if user has not specified a value. * are CPUs if user has not specified a value.
...@@ -123,6 +135,13 @@ module_param(log_stats, int, 0644); ...@@ -123,6 +135,13 @@ module_param(log_stats, int, 0644);
/* Number of free pages to remove on each call to gnttab_free_pages */ /* Number of free pages to remove on each call to gnttab_free_pages */
#define NUM_BATCH_FREE_PAGES 10 #define NUM_BATCH_FREE_PAGES 10
static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt)
{
return xen_blkif_pgrant_timeout &&
(jiffies - persistent_gnt->last_used >=
HZ * xen_blkif_pgrant_timeout);
}
static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page) static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
{ {
unsigned long flags; unsigned long flags;
...@@ -236,8 +255,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring, ...@@ -236,8 +255,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring,
} }
} }
bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE); persistent_gnt->active = true;
set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
/* Add new node and rebalance tree. */ /* Add new node and rebalance tree. */
rb_link_node(&(persistent_gnt->node), parent, new); rb_link_node(&(persistent_gnt->node), parent, new);
rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts); rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
...@@ -261,11 +279,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, ...@@ -261,11 +279,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
else if (gref > data->gnt) else if (gref > data->gnt)
node = node->rb_right; node = node->rb_right;
else { else {
if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) { if (data->active) {
pr_alert_ratelimited("requesting a grant already in use\n"); pr_alert_ratelimited("requesting a grant already in use\n");
return NULL; return NULL;
} }
set_bit(PERSISTENT_GNT_ACTIVE, data->flags); data->active = true;
atomic_inc(&ring->persistent_gnt_in_use); atomic_inc(&ring->persistent_gnt_in_use);
return data; return data;
} }
...@@ -276,10 +294,10 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, ...@@ -276,10 +294,10 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
static void put_persistent_gnt(struct xen_blkif_ring *ring, static void put_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *persistent_gnt) struct persistent_gnt *persistent_gnt)
{ {
if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) if (!persistent_gnt->active)
pr_alert_ratelimited("freeing a grant already unused\n"); pr_alert_ratelimited("freeing a grant already unused\n");
set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); persistent_gnt->last_used = jiffies;
clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); persistent_gnt->active = false;
atomic_dec(&ring->persistent_gnt_in_use); atomic_dec(&ring->persistent_gnt_in_use);
} }
...@@ -371,26 +389,26 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) ...@@ -371,26 +389,26 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
struct persistent_gnt *persistent_gnt; struct persistent_gnt *persistent_gnt;
struct rb_node *n; struct rb_node *n;
unsigned int num_clean, total; unsigned int num_clean, total;
bool scan_used = false, clean_used = false; bool scan_used = false;
struct rb_root *root; struct rb_root *root;
if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
(ring->persistent_gnt_c == xen_blkif_max_pgrants &&
!ring->blkif->vbd.overflow_max_grants)) {
goto out;
}
if (work_busy(&ring->persistent_purge_work)) { if (work_busy(&ring->persistent_purge_work)) {
pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n"); pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
goto out; goto out;
} }
if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
(ring->persistent_gnt_c == xen_blkif_max_pgrants &&
!ring->blkif->vbd.overflow_max_grants)) {
num_clean = 0;
} else {
num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean; num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants +
num_clean;
num_clean = min(ring->persistent_gnt_c, num_clean); num_clean = min(ring->persistent_gnt_c, num_clean);
if ((num_clean == 0) || pr_debug("Going to purge at least %u persistent grants\n",
(num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use)))) num_clean);
goto out; }
/* /*
* At this point, we can assure that there will be no calls * At this point, we can assure that there will be no calls
...@@ -401,9 +419,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) ...@@ -401,9 +419,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
* number of grants. * number of grants.
*/ */
total = num_clean; total = 0;
pr_debug("Going to purge %u persistent grants\n", num_clean);
BUG_ON(!list_empty(&ring->persistent_purge_list)); BUG_ON(!list_empty(&ring->persistent_purge_list));
root = &ring->persistent_gnts; root = &ring->persistent_gnts;
...@@ -412,46 +428,37 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) ...@@ -412,46 +428,37 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
BUG_ON(persistent_gnt->handle == BUG_ON(persistent_gnt->handle ==
BLKBACK_INVALID_HANDLE); BLKBACK_INVALID_HANDLE);
if (clean_used) { if (persistent_gnt->active)
clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
continue; continue;
} if (!scan_used && !persistent_gnt_timeout(persistent_gnt))
if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
continue; continue;
if (!scan_used && if (scan_used && total >= num_clean)
(test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
continue; continue;
rb_erase(&persistent_gnt->node, root); rb_erase(&persistent_gnt->node, root);
list_add(&persistent_gnt->remove_node, list_add(&persistent_gnt->remove_node,
&ring->persistent_purge_list); &ring->persistent_purge_list);
if (--num_clean == 0) total++;
goto finished;
} }
/* /*
* If we get here it means we also need to start cleaning * Check whether we also need to start cleaning
* grants that were used since last purge in order to cope * grants that were used since last purge in order to cope
* with the requested num * with the requested num
*/ */
if (!scan_used && !clean_used) { if (!scan_used && total < num_clean) {
pr_debug("Still missing %u purged frames\n", num_clean); pr_debug("Still missing %u purged frames\n", num_clean - total);
scan_used = true; scan_used = true;
goto purge_list; goto purge_list;
} }
finished:
if (!clean_used) {
pr_debug("Finished scanning for grants to clean, removing used flag\n");
clean_used = true;
goto purge_list;
}
ring->persistent_gnt_c -= (total - num_clean); if (total) {
ring->persistent_gnt_c -= total;
ring->blkif->vbd.overflow_max_grants = 0; ring->blkif->vbd.overflow_max_grants = 0;
/* We can defer this work */ /* We can defer this work */
schedule_work(&ring->persistent_purge_work); schedule_work(&ring->persistent_purge_work);
pr_debug("Purged %u/%u\n", (total - num_clean), total); pr_debug("Purged %u/%u\n", num_clean, total);
}
out: out:
return; return;
......
...@@ -233,16 +233,6 @@ struct xen_vbd { ...@@ -233,16 +233,6 @@ struct xen_vbd {
struct backend_info; struct backend_info;
/* Number of available flags */
#define PERSISTENT_GNT_FLAGS_SIZE 2
/* This persistent grant is currently in use */
#define PERSISTENT_GNT_ACTIVE 0
/*
* This persistent grant has been used, this flag is set when we remove the
* PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
*/
#define PERSISTENT_GNT_WAS_ACTIVE 1
/* Number of requests that we can fit in a ring */ /* Number of requests that we can fit in a ring */
#define XEN_BLKIF_REQS_PER_PAGE 32 #define XEN_BLKIF_REQS_PER_PAGE 32
...@@ -250,7 +240,8 @@ struct persistent_gnt { ...@@ -250,7 +240,8 @@ struct persistent_gnt {
struct page *page; struct page *page;
grant_ref_t gnt; grant_ref_t gnt;
grant_handle_t handle; grant_handle_t handle;
DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE); unsigned long last_used;
bool active;
struct rb_node node; struct rb_node node;
struct list_head remove_node; struct list_head remove_node;
}; };
...@@ -278,7 +269,6 @@ struct xen_blkif_ring { ...@@ -278,7 +269,6 @@ struct xen_blkif_ring {
wait_queue_head_t pending_free_wq; wait_queue_head_t pending_free_wq;
/* Tree to store persistent grants. */ /* Tree to store persistent grants. */
spinlock_t pers_gnts_lock;
struct rb_root persistent_gnts; struct rb_root persistent_gnts;
unsigned int persistent_gnt_c; unsigned int persistent_gnt_c;
atomic_t persistent_gnt_in_use; atomic_t persistent_gnt_in_use;
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/bitmap.h> #include <linux/bitmap.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/workqueue.h>
#include <xen/xen.h> #include <xen/xen.h>
#include <xen/xenbus.h> #include <xen/xenbus.h>
...@@ -121,6 +122,8 @@ static inline struct blkif_req *blkif_req(struct request *rq) ...@@ -121,6 +122,8 @@ static inline struct blkif_req *blkif_req(struct request *rq)
static DEFINE_MUTEX(blkfront_mutex); static DEFINE_MUTEX(blkfront_mutex);
static const struct block_device_operations xlvbd_block_fops; static const struct block_device_operations xlvbd_block_fops;
static struct delayed_work blkfront_work;
static LIST_HEAD(info_list);
/* /*
* Maximum number of segments in indirect requests, the actual value used by * Maximum number of segments in indirect requests, the actual value used by
...@@ -216,6 +219,7 @@ struct blkfront_info ...@@ -216,6 +219,7 @@ struct blkfront_info
/* Save uncomplete reqs and bios for migration. */ /* Save uncomplete reqs and bios for migration. */
struct list_head requests; struct list_head requests;
struct bio_list bio_list; struct bio_list bio_list;
struct list_head info_list;
}; };
static unsigned int nr_minors; static unsigned int nr_minors;
...@@ -1759,6 +1763,12 @@ static int write_per_ring_nodes(struct xenbus_transaction xbt, ...@@ -1759,6 +1763,12 @@ static int write_per_ring_nodes(struct xenbus_transaction xbt,
return err; return err;
} }
static void free_info(struct blkfront_info *info)
{
list_del(&info->info_list);
kfree(info);
}
/* Common code used when first setting up, and when resuming. */ /* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev, static int talk_to_blkback(struct xenbus_device *dev,
struct blkfront_info *info) struct blkfront_info *info)
...@@ -1880,7 +1890,10 @@ static int talk_to_blkback(struct xenbus_device *dev, ...@@ -1880,7 +1890,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
destroy_blkring: destroy_blkring:
blkif_free(info, 0); blkif_free(info, 0);
kfree(info); mutex_lock(&blkfront_mutex);
free_info(info);
mutex_unlock(&blkfront_mutex);
dev_set_drvdata(&dev->dev, NULL); dev_set_drvdata(&dev->dev, NULL);
return err; return err;
...@@ -1991,6 +2004,10 @@ static int blkfront_probe(struct xenbus_device *dev, ...@@ -1991,6 +2004,10 @@ static int blkfront_probe(struct xenbus_device *dev,
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info); dev_set_drvdata(&dev->dev, info);
mutex_lock(&blkfront_mutex);
list_add(&info->info_list, &info_list);
mutex_unlock(&blkfront_mutex);
return 0; return 0;
} }
...@@ -2301,6 +2318,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) ...@@ -2301,6 +2318,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST) if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST)
indirect_segments = 0; indirect_segments = 0;
info->max_indirect_segments = indirect_segments; info->max_indirect_segments = indirect_segments;
if (info->feature_persistent) {
mutex_lock(&blkfront_mutex);
schedule_delayed_work(&blkfront_work, HZ * 10);
mutex_unlock(&blkfront_mutex);
}
} }
/* /*
...@@ -2482,7 +2505,9 @@ static int blkfront_remove(struct xenbus_device *xbdev) ...@@ -2482,7 +2505,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
mutex_unlock(&info->mutex); mutex_unlock(&info->mutex);
if (!bdev) { if (!bdev) {
kfree(info); mutex_lock(&blkfront_mutex);
free_info(info);
mutex_unlock(&blkfront_mutex);
return 0; return 0;
} }
...@@ -2502,7 +2527,9 @@ static int blkfront_remove(struct xenbus_device *xbdev) ...@@ -2502,7 +2527,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
if (info && !bdev->bd_openers) { if (info && !bdev->bd_openers) {
xlvbd_release_gendisk(info); xlvbd_release_gendisk(info);
disk->private_data = NULL; disk->private_data = NULL;
kfree(info); mutex_lock(&blkfront_mutex);
free_info(info);
mutex_unlock(&blkfront_mutex);
} }
mutex_unlock(&bdev->bd_mutex); mutex_unlock(&bdev->bd_mutex);
...@@ -2585,7 +2612,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode) ...@@ -2585,7 +2612,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode)
dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
xlvbd_release_gendisk(info); xlvbd_release_gendisk(info);
disk->private_data = NULL; disk->private_data = NULL;
kfree(info); free_info(info);
} }
out: out:
...@@ -2618,6 +2645,61 @@ static struct xenbus_driver blkfront_driver = { ...@@ -2618,6 +2645,61 @@ static struct xenbus_driver blkfront_driver = {
.is_ready = blkfront_is_ready, .is_ready = blkfront_is_ready,
}; };
static void purge_persistent_grants(struct blkfront_info *info)
{
unsigned int i;
unsigned long flags;
for (i = 0; i < info->nr_rings; i++) {
struct blkfront_ring_info *rinfo = &info->rinfo[i];
struct grant *gnt_list_entry, *tmp;
spin_lock_irqsave(&rinfo->ring_lock, flags);
if (rinfo->persistent_gnts_c == 0) {
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
continue;
}
list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
node) {
if (gnt_list_entry->gref == GRANT_INVALID_REF ||
gnttab_query_foreign_access(gnt_list_entry->gref))
continue;
list_del(&gnt_list_entry->node);
gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
rinfo->persistent_gnts_c--;
__free_page(gnt_list_entry->page);
kfree(gnt_list_entry);
}
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
}
}
static void blkfront_delay_work(struct work_struct *work)
{
struct blkfront_info *info;
bool need_schedule_work = false;
mutex_lock(&blkfront_mutex);
list_for_each_entry(info, &info_list, info_list) {
if (info->feature_persistent) {
need_schedule_work = true;
mutex_lock(&info->mutex);
purge_persistent_grants(info);
mutex_unlock(&info->mutex);
}
}
if (need_schedule_work)
schedule_delayed_work(&blkfront_work, HZ * 10);
mutex_unlock(&blkfront_mutex);
}
static int __init xlblk_init(void) static int __init xlblk_init(void)
{ {
int ret; int ret;
...@@ -2626,6 +2708,15 @@ static int __init xlblk_init(void) ...@@ -2626,6 +2708,15 @@ static int __init xlblk_init(void)
if (!xen_domain()) if (!xen_domain())
return -ENODEV; return -ENODEV;
if (!xen_has_pv_disk_devices())
return -ENODEV;
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
pr_warn("xen_blk: can't get major %d with name %s\n",
XENVBD_MAJOR, DEV_NAME);
return -ENODEV;
}
if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST)
xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
...@@ -2641,14 +2732,7 @@ static int __init xlblk_init(void) ...@@ -2641,14 +2732,7 @@ static int __init xlblk_init(void)
xen_blkif_max_queues = nr_cpus; xen_blkif_max_queues = nr_cpus;
} }
if (!xen_has_pv_disk_devices()) INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work);
return -ENODEV;
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
XENVBD_MAJOR, DEV_NAME);
return -ENODEV;
}
ret = xenbus_register_frontend(&blkfront_driver); ret = xenbus_register_frontend(&blkfront_driver);
if (ret) { if (ret) {
...@@ -2663,6 +2747,8 @@ module_init(xlblk_init); ...@@ -2663,6 +2747,8 @@ module_init(xlblk_init);
static void __exit xlblk_exit(void) static void __exit xlblk_exit(void)
{ {
cancel_delayed_work_sync(&blkfront_work);
xenbus_unregister_driver(&blkfront_driver); xenbus_unregister_driver(&blkfront_driver);
unregister_blkdev(XENVBD_MAJOR, DEV_NAME); unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
kfree(minors); kfree(minors);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment