Commit 5c2e7a0a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-5.12b-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull more xen updates from Juergen Gross:

 - A small series for Xen event channels adding some sysfs nodes for per
   pv-device settings and statistics, and two fixes of theoretical
   problems.

 - two minor fixes (one for an unlikely error path, one for a comment).

* tag 'for-linus-5.12b-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen-front-pgdir-shbuf: don't record wrong grant handle upon error
  xen: Replace lkml.org links with lore
  xen/evtchn: use READ/WRITE_ONCE() for accessing ring indices
  xen/evtchn: use smp barriers for user event ring
  xen/events: add per-xenbus device event statistics and settings
parents d94d1400 53f131c2
What: /sys/devices/*/xenbus/event_channels
Date: February 2021
Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
Description:
Number of Xen event channels associated with a kernel based
paravirtualized device frontend or backend.
What: /sys/devices/*/xenbus/events
Date: February 2021
Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
Description:
Total number of Xen events received for a Xen pv device
frontend or backend.
What: /sys/devices/*/xenbus/jiffies_eoi_delayed
Date: February 2021
Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
Description:
Summed up time in jiffies the EOI of an interrupt for a Xen
pv device has been delayed in order to avoid stalls due to
event storms. This value rising is a first sign for a rogue
other end of the pv device.
What: /sys/devices/*/xenbus/spurious_events
Date: February 2021
Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
Description:
Number of events received for a Xen pv device which did not
require any action. Too many spurious events in a row will
trigger delayed EOI processing.
What: /sys/devices/*/xenbus/spurious_threshold
Date: February 2021
Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
Description:
Controls the tolerated number of subsequent spurious events
before delayed EOI processing is triggered for a Xen pv
device. Default is 1. This can be modified in case the other
end of the pv device is issuing spurious events on a regular
basis and is known not to be malicious on purpose. Raising
the value for such cases can improve pv device performance.
......@@ -323,6 +323,8 @@ static int xen_irq_info_evtchn_setup(unsigned irq,
ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
info->u.interdomain = dev;
if (dev)
atomic_inc(&dev->event_channels);
return ret;
}
......@@ -568,18 +570,28 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
return;
if (spurious) {
struct xenbus_device *dev = info->u.interdomain;
unsigned int threshold = 1;
if (dev && dev->spurious_threshold)
threshold = dev->spurious_threshold;
if ((1 << info->spurious_cnt) < (HZ << 2)) {
if (info->spurious_cnt != 0xFF)
info->spurious_cnt++;
}
if (info->spurious_cnt > 1) {
delay = 1 << (info->spurious_cnt - 2);
if (info->spurious_cnt > threshold) {
delay = 1 << (info->spurious_cnt - 1 - threshold);
if (delay > HZ)
delay = HZ;
if (!info->eoi_time)
info->eoi_cpu = smp_processor_id();
info->eoi_time = get_jiffies_64() + delay;
if (dev)
atomic_add(delay, &dev->jiffies_eoi_delayed);
}
if (dev)
atomic_inc(&dev->spurious_events);
} else {
info->spurious_cnt = 0;
}
......@@ -908,6 +920,7 @@ static void __unbind_from_irq(unsigned int irq)
if (VALID_EVTCHN(evtchn)) {
unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev;
xen_evtchn_close(evtchn);
......@@ -918,6 +931,11 @@ static void __unbind_from_irq(unsigned int irq)
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
break;
case IRQT_EVTCHN:
dev = info->u.interdomain;
if (dev)
atomic_dec(&dev->event_channels);
break;
default:
break;
}
......@@ -1581,6 +1599,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
{
int irq;
struct irq_info *info;
struct xenbus_device *dev;
irq = get_evtchn_to_irq(port);
if (irq == -1)
......@@ -1610,6 +1629,10 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
info = info_for_irq(irq);
dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
if (dev)
atomic_inc(&dev->events);
if (ctrl->defer_eoi) {
info->eoi_cpu = smp_processor_id();
info->irq_epoch = __this_cpu_read(irq_epoch);
......
......@@ -162,6 +162,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
{
struct user_evtchn *evtchn = data;
struct per_user_data *u = evtchn->user;
unsigned int prod, cons;
WARN(!evtchn->enabled,
"Interrupt for port %u, but apparently not enabled; per-user %p\n",
......@@ -171,10 +172,14 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
spin_lock(&u->ring_prod_lock);
if ((u->ring_prod - u->ring_cons) < u->ring_size) {
*evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) {
prod = READ_ONCE(u->ring_prod);
cons = READ_ONCE(u->ring_cons);
if ((prod - cons) < u->ring_size) {
*evtchn_ring_entry(u, prod) = evtchn->port;
smp_wmb(); /* Ensure ring contents visible */
WRITE_ONCE(u->ring_prod, prod + 1);
if (cons == prod) {
wake_up_interruptible(&u->evtchn_wait);
kill_fasync(&u->evtchn_async_queue,
SIGIO, POLL_IN);
......@@ -210,8 +215,8 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
if (u->ring_overflow)
goto unlock_out;
c = u->ring_cons;
p = u->ring_prod;
c = READ_ONCE(u->ring_cons);
p = READ_ONCE(u->ring_prod);
if (c != p)
break;
......@@ -221,7 +226,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
return -EAGAIN;
rc = wait_event_interruptible(u->evtchn_wait,
u->ring_cons != u->ring_prod);
READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod));
if (rc)
return rc;
}
......@@ -245,13 +250,13 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
}
rc = -EFAULT;
rmb(); /* Ensure that we see the port before we copy it. */
smp_rmb(); /* Ensure that we see the port before we copy it. */
if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
((bytes2 != 0) &&
copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
goto unlock_out;
u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t));
rc = bytes1 + bytes2;
unlock_out:
......@@ -552,7 +557,9 @@ static long evtchn_ioctl(struct file *file,
/* Initialise the ring to empty. Clear errors. */
mutex_lock(&u->ring_cons_mutex);
spin_lock_irq(&u->ring_prod_lock);
u->ring_cons = u->ring_prod = u->ring_overflow = 0;
WRITE_ONCE(u->ring_cons, 0);
WRITE_ONCE(u->ring_prod, 0);
u->ring_overflow = 0;
spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex);
rc = 0;
......@@ -595,7 +602,7 @@ static __poll_t evtchn_poll(struct file *file, poll_table *wait)
struct per_user_data *u = file->private_data;
poll_wait(file, &u->evtchn_wait, wait);
if (u->ring_cons != u->ring_prod)
if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod))
mask |= EPOLLIN | EPOLLRDNORM;
if (u->ring_overflow)
mask = EPOLLERR;
......
......@@ -3,7 +3,8 @@
* Copyright 2012 by Oracle Inc
* Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
*
* This code borrows ideas from https://lkml.org/lkml/2011/11/30/249
* This code borrows ideas from
* https://lore.kernel.org/lkml/1322673664-14642-6-git-send-email-konrad.wilk@oracle.com
* so many thanks go to Kevin Tian <kevin.tian@intel.com>
* and Yu Ke <ke.yu@intel.com>.
*/
......
......@@ -305,11 +305,18 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf)
/* Save handles even if error, so we can unmap. */
for (cur_page = 0; cur_page < buf->num_pages; cur_page++) {
buf->backend_map_handles[cur_page] = map_ops[cur_page].handle;
if (unlikely(map_ops[cur_page].status != GNTST_okay))
if (likely(map_ops[cur_page].status == GNTST_okay)) {
buf->backend_map_handles[cur_page] =
map_ops[cur_page].handle;
} else {
buf->backend_map_handles[cur_page] =
INVALID_GRANT_HANDLE;
if (!ret)
ret = -ENXIO;
dev_err(&buf->xb_dev->dev,
"Failed to map page %d: %d\n",
cur_page, map_ops[cur_page].status);
}
}
if (ret) {
......
......@@ -206,6 +206,65 @@ void xenbus_otherend_changed(struct xenbus_watch *watch,
}
EXPORT_SYMBOL_GPL(xenbus_otherend_changed);
#define XENBUS_SHOW_STAT(name) \
static ssize_t show_##name(struct device *_dev, \
struct device_attribute *attr, \
char *buf) \
{ \
struct xenbus_device *dev = to_xenbus_device(_dev); \
\
return sprintf(buf, "%d\n", atomic_read(&dev->name)); \
} \
static DEVICE_ATTR(name, 0444, show_##name, NULL)
XENBUS_SHOW_STAT(event_channels);
XENBUS_SHOW_STAT(events);
XENBUS_SHOW_STAT(spurious_events);
XENBUS_SHOW_STAT(jiffies_eoi_delayed);
static ssize_t show_spurious_threshold(struct device *_dev,
struct device_attribute *attr,
char *buf)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
return sprintf(buf, "%d\n", dev->spurious_threshold);
}
static ssize_t set_spurious_threshold(struct device *_dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
unsigned int val;
ssize_t ret;
ret = kstrtouint(buf, 0, &val);
if (ret)
return ret;
dev->spurious_threshold = val;
return count;
}
static DEVICE_ATTR(spurious_threshold, 0644, show_spurious_threshold,
set_spurious_threshold);
static struct attribute *xenbus_attrs[] = {
&dev_attr_event_channels.attr,
&dev_attr_events.attr,
&dev_attr_spurious_events.attr,
&dev_attr_jiffies_eoi_delayed.attr,
&dev_attr_spurious_threshold.attr,
NULL
};
static const struct attribute_group xenbus_group = {
.name = "xenbus",
.attrs = xenbus_attrs,
};
int xenbus_dev_probe(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
......@@ -253,6 +312,11 @@ int xenbus_dev_probe(struct device *_dev)
return err;
}
dev->spurious_threshold = 1;
if (sysfs_create_group(&dev->dev.kobj, &xenbus_group))
dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n",
dev->nodename);
return 0;
fail_put:
module_put(drv->driver.owner);
......@@ -269,6 +333,8 @@ int xenbus_dev_remove(struct device *_dev)
DPRINTK("%s", dev->nodename);
sysfs_remove_group(&dev->dev.kobj, &xenbus_group);
free_otherend_watch(dev);
if (drv->remove) {
......
......@@ -88,6 +88,13 @@ struct xenbus_device {
struct completion down;
struct work_struct work;
struct semaphore reclaim_sem;
/* Event channel based statistics and settings. */
atomic_t event_channels;
atomic_t events;
atomic_t spurious_events;
atomic_t jiffies_eoi_delayed;
unsigned int spurious_threshold;
};
static inline struct xenbus_device *to_xenbus_device(struct device *dev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment