Commit 340611ab authored by David S. Miller's avatar David S. Miller

Merge branch 'xen_split_event_channels'

Wei Liu says:

====================
This series adds a new feature called split event channels.

In the original implementation, only one event channel is setup between
frontend and backend. This is not ideal as TX notification interferes with RX
notification. Using dedicated event channels for TX and RX solves this issue.

Changes since V2:
* feature_split_event_channels -> separate_tx_rx_irq
* make separate_tx_rx_irq bool
* document this feature in header file
* don't report fatal if writing feature to xenstore fails
* frontend will fall back to single event channel if new feature setup fails

Changes since V1:
* change subject lines of commits to be more specific
* add parameter feature_split_event_channels for xen-netback
* remove two dev_info
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 88924753 a5560a6c
......@@ -57,8 +57,12 @@ struct xenvif {
u8 fe_dev_addr[6];
/* Physical parameters of the comms window. */
unsigned int irq;
/* When feature-split-event-channels = 0, tx_irq = rx_irq. */
unsigned int tx_irq;
unsigned int rx_irq;
/* Only used when feature-split-event-channels = 1 */
char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */
char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
/* List of frontends to notify after a batch of frames sent. */
struct list_head notify_list;
......@@ -113,7 +117,8 @@ struct xenvif *xenvif_alloc(struct device *parent,
unsigned int handle);
int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
unsigned long rx_ring_ref, unsigned int evtchn);
unsigned long rx_ring_ref, unsigned int tx_evtchn,
unsigned int rx_evtchn);
void xenvif_disconnect(struct xenvif *vif);
void xenvif_get(struct xenvif *vif);
......@@ -158,4 +163,6 @@ void xenvif_carrier_off(struct xenvif *vif);
/* Returns number of ring slots required to send an skb to the frontend */
unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
extern bool separate_tx_rx_irq;
#endif /* __XEN_NETBACK__COMMON_H__ */
......@@ -60,21 +60,39 @@ static int xenvif_rx_schedulable(struct xenvif *vif)
return xenvif_schedulable(vif) && !xen_netbk_rx_ring_full(vif);
}
static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
{
struct xenvif *vif = dev_id;
if (vif->netbk == NULL)
return IRQ_NONE;
return IRQ_HANDLED;
xen_netbk_schedule_xenvif(vif);
return IRQ_HANDLED;
}
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif *vif = dev_id;
if (vif->netbk == NULL)
return IRQ_HANDLED;
if (xenvif_rx_schedulable(vif))
netif_wake_queue(vif->dev);
return IRQ_HANDLED;
}
static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
{
xenvif_tx_interrupt(irq, dev_id);
xenvif_rx_interrupt(irq, dev_id);
return IRQ_HANDLED;
}
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
......@@ -125,13 +143,17 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
static void xenvif_up(struct xenvif *vif)
{
xen_netbk_add_xenvif(vif);
enable_irq(vif->irq);
enable_irq(vif->tx_irq);
if (vif->tx_irq != vif->rx_irq)
enable_irq(vif->rx_irq);
xen_netbk_check_rx_xenvif(vif);
}
static void xenvif_down(struct xenvif *vif)
{
disable_irq(vif->irq);
disable_irq(vif->tx_irq);
if (vif->tx_irq != vif->rx_irq)
disable_irq(vif->rx_irq);
del_timer_sync(&vif->credit_timeout);
xen_netbk_deschedule_xenvif(vif);
xen_netbk_remove_xenvif(vif);
......@@ -308,12 +330,13 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
}
int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
unsigned long rx_ring_ref, unsigned int evtchn)
unsigned long rx_ring_ref, unsigned int tx_evtchn,
unsigned int rx_evtchn)
{
int err = -ENOMEM;
/* Already connected through? */
if (vif->irq)
if (vif->tx_irq)
return 0;
__module_get(THIS_MODULE);
......@@ -322,13 +345,37 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
if (err < 0)
goto err;
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
err = bind_interdomain_evtchn_to_irqhandler(
vif->domid, evtchn, xenvif_interrupt, 0,
vif->domid, tx_evtchn, xenvif_interrupt, 0,
vif->dev->name, vif);
if (err < 0)
goto err_unmap;
vif->irq = err;
disable_irq(vif->irq);
vif->tx_irq = vif->rx_irq = err;
disable_irq(vif->tx_irq);
} else {
/* feature-split-event-channels == 1 */
snprintf(vif->tx_irq_name, sizeof(vif->tx_irq_name),
"%s-tx", vif->dev->name);
err = bind_interdomain_evtchn_to_irqhandler(
vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
vif->tx_irq_name, vif);
if (err < 0)
goto err_unmap;
vif->tx_irq = err;
disable_irq(vif->tx_irq);
snprintf(vif->rx_irq_name, sizeof(vif->rx_irq_name),
"%s-rx", vif->dev->name);
err = bind_interdomain_evtchn_to_irqhandler(
vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
vif->rx_irq_name, vif);
if (err < 0)
goto err_tx_unbind;
vif->rx_irq = err;
disable_irq(vif->rx_irq);
}
xenvif_get(vif);
......@@ -342,6 +389,9 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
rtnl_unlock();
return 0;
err_tx_unbind:
unbind_from_irqhandler(vif->tx_irq, vif);
vif->tx_irq = 0;
err_unmap:
xen_netbk_unmap_frontend_rings(vif);
err:
......@@ -375,8 +425,13 @@ void xenvif_disconnect(struct xenvif *vif)
atomic_dec(&vif->refcnt);
wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0);
if (vif->irq) {
unbind_from_irqhandler(vif->irq, vif);
if (vif->tx_irq) {
if (vif->tx_irq == vif->rx_irq)
unbind_from_irqhandler(vif->tx_irq, vif);
else {
unbind_from_irqhandler(vif->tx_irq, vif);
unbind_from_irqhandler(vif->rx_irq, vif);
}
/* vif->irq is valid, we had a module_get in
* xenvif_connect.
*/
......
......@@ -47,6 +47,13 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>
/* Provide an option to disable split event channels at load time as
* event channels are limited resource. Split event channels are
* enabled by default.
*/
bool separate_tx_rx_irq = 1;
module_param(separate_tx_rx_irq, bool, 0644);
/*
* This is the maximum slots a skb can have. If a guest sends a skb
* which exceeds this limit it is considered malicious.
......@@ -662,7 +669,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
{
struct xenvif *vif = NULL, *tmp;
s8 status;
u16 irq, flags;
u16 flags;
struct xen_netif_rx_response *resp;
struct sk_buff_head rxq;
struct sk_buff *skb;
......@@ -771,7 +778,6 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
sco->meta_slots_used);
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
irq = vif->irq;
if (ret && list_empty(&vif->notify_list))
list_add_tail(&vif->notify_list, &notify);
......@@ -783,7 +789,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
}
list_for_each_entry_safe(vif, tmp, &notify, notify_list) {
notify_remote_via_irq(vif->irq);
notify_remote_via_irq(vif->rx_irq);
list_del_init(&vif->notify_list);
}
......@@ -1762,7 +1768,7 @@ static void make_tx_response(struct xenvif *vif,
vif->tx.rsp_prod_pvt = ++i;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
if (notify)
notify_remote_via_irq(vif->irq);
notify_remote_via_irq(vif->tx_irq);
}
static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
......
......@@ -122,6 +122,16 @@ static int netback_probe(struct xenbus_device *dev,
goto fail;
}
/*
* Split event channels support, this is optional so it is not
* put inside the above loop.
*/
err = xenbus_printf(XBT_NIL, dev->nodename,
"feature-split-event-channels",
"%u", separate_tx_rx_irq);
if (err)
pr_debug("Error writing feature-split-event-channels");
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
......@@ -393,21 +403,36 @@ static int connect_rings(struct backend_info *be)
struct xenvif *vif = be->vif;
struct xenbus_device *dev = be->dev;
unsigned long tx_ring_ref, rx_ring_ref;
unsigned int evtchn, rx_copy;
unsigned int tx_evtchn, rx_evtchn, rx_copy;
int err;
int val;
err = xenbus_gather(XBT_NIL, dev->otherend,
"tx-ring-ref", "%lu", &tx_ring_ref,
"rx-ring-ref", "%lu", &rx_ring_ref,
"event-channel", "%u", &evtchn, NULL);
"rx-ring-ref", "%lu", &rx_ring_ref, NULL);
if (err) {
xenbus_dev_fatal(dev, err,
"reading %s/ring-ref and event-channel",
"reading %s/ring-ref",
dev->otherend);
return err;
}
/* Try split event channels first, then single event channel. */
err = xenbus_gather(XBT_NIL, dev->otherend,
"event-channel-tx", "%u", &tx_evtchn,
"event-channel-rx", "%u", &rx_evtchn, NULL);
if (err < 0) {
err = xenbus_scanf(XBT_NIL, dev->otherend,
"event-channel", "%u", &tx_evtchn);
if (err < 0) {
xenbus_dev_fatal(dev, err,
"reading %s/event-channel(-tx/rx)",
dev->otherend);
return err;
}
rx_evtchn = tx_evtchn;
}
err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
&rx_copy);
if (err == -ENOENT) {
......@@ -454,11 +479,13 @@ static int connect_rings(struct backend_info *be)
vif->csum = !val;
/* Map the shared frame, irq etc. */
err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, evtchn);
err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref,
tx_evtchn, rx_evtchn);
if (err) {
xenbus_dev_fatal(dev, err,
"mapping shared-frames %lu/%lu port %u",
tx_ring_ref, rx_ring_ref, evtchn);
"mapping shared-frames %lu/%lu port tx %u rx %u",
tx_ring_ref, rx_ring_ref,
tx_evtchn, rx_evtchn);
return err;
}
return 0;
......
......@@ -85,7 +85,15 @@ struct netfront_info {
struct napi_struct napi;
unsigned int evtchn;
/* Split event channels support, tx_* == rx_* when using
* single event channel.
*/
unsigned int tx_evtchn, rx_evtchn;
unsigned int tx_irq, rx_irq;
/* Only used when split event channels support is enabled */
char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */
char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
struct xenbus_device *xbdev;
spinlock_t tx_lock;
......@@ -330,7 +338,7 @@ static void xennet_alloc_rx_buffers(struct net_device *dev)
push:
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
if (notify)
notify_remote_via_irq(np->netdev->irq);
notify_remote_via_irq(np->rx_irq);
}
static int xennet_open(struct net_device *dev)
......@@ -623,7 +631,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
if (notify)
notify_remote_via_irq(np->netdev->irq);
notify_remote_via_irq(np->tx_irq);
u64_stats_update_begin(&stats->syncp);
stats->tx_bytes += skb->len;
......@@ -1254,23 +1262,35 @@ static int xennet_set_features(struct net_device *dev,
return 0;
}
static irqreturn_t xennet_interrupt(int irq, void *dev_id)
static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
{
struct net_device *dev = dev_id;
struct netfront_info *np = netdev_priv(dev);
struct netfront_info *np = dev_id;
struct net_device *dev = np->netdev;
unsigned long flags;
spin_lock_irqsave(&np->tx_lock, flags);
if (likely(netif_carrier_ok(dev))) {
xennet_tx_buf_gc(dev);
/* Under tx_lock: protects access to rx shared-ring indexes. */
if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
spin_unlock_irqrestore(&np->tx_lock, flags);
return IRQ_HANDLED;
}
static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
{
struct netfront_info *np = dev_id;
struct net_device *dev = np->netdev;
if (likely(netif_carrier_ok(dev) &&
RING_HAS_UNCONSUMED_RESPONSES(&np->rx)))
napi_schedule(&np->napi);
}
spin_unlock_irqrestore(&np->tx_lock, flags);
return IRQ_HANDLED;
}
static irqreturn_t xennet_interrupt(int irq, void *dev_id)
{
xennet_tx_interrupt(irq, dev_id);
xennet_rx_interrupt(irq, dev_id);
return IRQ_HANDLED;
}
......@@ -1451,9 +1471,14 @@ static void xennet_disconnect_backend(struct netfront_info *info)
spin_unlock_irq(&info->tx_lock);
spin_unlock_bh(&info->rx_lock);
if (info->netdev->irq)
unbind_from_irqhandler(info->netdev->irq, info->netdev);
info->evtchn = info->netdev->irq = 0;
if (info->tx_irq && (info->tx_irq == info->rx_irq))
unbind_from_irqhandler(info->tx_irq, info);
if (info->tx_irq && (info->tx_irq != info->rx_irq)) {
unbind_from_irqhandler(info->tx_irq, info);
unbind_from_irqhandler(info->rx_irq, info);
}
info->tx_evtchn = info->rx_evtchn = 0;
info->tx_irq = info->rx_irq = 0;
/* End access and free the pages */
xennet_end_access(info->tx_ring_ref, info->tx.sring);
......@@ -1503,12 +1528,82 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
return 0;
}
static int setup_netfront_single(struct netfront_info *info)
{
int err;
err = xenbus_alloc_evtchn(info->xbdev, &info->tx_evtchn);
if (err < 0)
goto fail;
err = bind_evtchn_to_irqhandler(info->tx_evtchn,
xennet_interrupt,
0, info->netdev->name, info);
if (err < 0)
goto bind_fail;
info->rx_evtchn = info->tx_evtchn;
info->rx_irq = info->tx_irq = err;
return 0;
bind_fail:
xenbus_free_evtchn(info->xbdev, info->tx_evtchn);
info->tx_evtchn = 0;
fail:
return err;
}
static int setup_netfront_split(struct netfront_info *info)
{
int err;
err = xenbus_alloc_evtchn(info->xbdev, &info->tx_evtchn);
if (err < 0)
goto fail;
err = xenbus_alloc_evtchn(info->xbdev, &info->rx_evtchn);
if (err < 0)
goto alloc_rx_evtchn_fail;
snprintf(info->tx_irq_name, sizeof(info->tx_irq_name),
"%s-tx", info->netdev->name);
err = bind_evtchn_to_irqhandler(info->tx_evtchn,
xennet_tx_interrupt,
0, info->tx_irq_name, info);
if (err < 0)
goto bind_tx_fail;
info->tx_irq = err;
snprintf(info->rx_irq_name, sizeof(info->rx_irq_name),
"%s-rx", info->netdev->name);
err = bind_evtchn_to_irqhandler(info->rx_evtchn,
xennet_rx_interrupt,
0, info->rx_irq_name, info);
if (err < 0)
goto bind_rx_fail;
info->rx_irq = err;
return 0;
bind_rx_fail:
unbind_from_irqhandler(info->tx_irq, info);
info->tx_irq = 0;
bind_tx_fail:
xenbus_free_evtchn(info->xbdev, info->rx_evtchn);
info->rx_evtchn = 0;
alloc_rx_evtchn_fail:
xenbus_free_evtchn(info->xbdev, info->tx_evtchn);
info->tx_evtchn = 0;
fail:
return err;
}
static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
{
struct xen_netif_tx_sring *txs;
struct xen_netif_rx_sring *rxs;
int err;
struct net_device *netdev = info->netdev;
unsigned int feature_split_evtchn;
info->tx_ring_ref = GRANT_INVALID_REF;
info->rx_ring_ref = GRANT_INVALID_REF;
......@@ -1516,6 +1611,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
info->tx.sring = NULL;
netdev->irq = 0;
err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
"feature-split-event-channels", "%u",
&feature_split_evtchn);
if (err < 0)
feature_split_evtchn = 0;
err = xen_net_read_mac(dev, netdev->dev_addr);
if (err) {
xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
......@@ -1550,22 +1651,23 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
goto grant_rx_ring_fail;
info->rx_ring_ref = err;
err = xenbus_alloc_evtchn(dev, &info->evtchn);
if (feature_split_evtchn)
err = setup_netfront_split(info);
/* setup single event channel if
* a) feature-split-event-channels == 0
* b) feature-split-event-channels == 1 but failed to setup
*/
if (!feature_split_evtchn || (feature_split_evtchn && err))
err = setup_netfront_single(info);
if (err)
goto alloc_evtchn_fail;
err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
0, netdev->name, netdev);
if (err < 0)
goto bind_fail;
netdev->irq = err;
return 0;
/* If we fail to setup netfront, it is safe to just revoke access to
* granted pages because backend is not accessing it at this point.
*/
bind_fail:
xenbus_free_evtchn(dev, info->evtchn);
alloc_evtchn_fail:
gnttab_end_foreign_access_ref(info->rx_ring_ref, 0);
grant_rx_ring_fail:
......@@ -1610,12 +1712,28 @@ static int talk_to_netback(struct xenbus_device *dev,
message = "writing rx ring-ref";
goto abort_transaction;
}
if (info->tx_evtchn == info->rx_evtchn) {
err = xenbus_printf(xbt, dev->nodename,
"event-channel", "%u", info->evtchn);
"event-channel", "%u", info->tx_evtchn);
if (err) {
message = "writing event-channel";
goto abort_transaction;
}
} else {
err = xenbus_printf(xbt, dev->nodename,
"event-channel-tx", "%u", info->tx_evtchn);
if (err) {
message = "writing event-channel-tx";
goto abort_transaction;
}
err = xenbus_printf(xbt, dev->nodename,
"event-channel-rx", "%u", info->rx_evtchn);
if (err) {
message = "writing event-channel-rx";
goto abort_transaction;
}
}
err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1);
......@@ -1727,7 +1845,9 @@ static int xennet_connect(struct net_device *dev)
* packets.
*/
netif_carrier_on(np->netdev);
notify_remote_via_irq(np->netdev->irq);
notify_remote_via_irq(np->tx_irq);
if (np->tx_irq != np->rx_irq)
notify_remote_via_irq(np->rx_irq);
xennet_tx_buf_gc(dev);
xennet_alloc_rx_buffers(dev);
......
......@@ -38,6 +38,18 @@
* that it cannot safely queue packets (as it may not be kicked to send them).
*/
/*
* "feature-split-event-channels" is introduced to separate guest TX
* and RX notificaion. Backend either doesn't support this feature or
* advertise it via xenstore as 0 (disabled) or 1 (enabled).
*
* To make use of this feature, frontend should allocate two event
* channels for TX and RX, advertise them to backend as
* "event-channel-tx" and "event-channel-rx" respectively. If frontend
* doesn't want to use this feature, it just writes "event-channel"
* node as before.
*/
/*
* This is the 'wire' format for packets:
* Request 1: xen_netif_tx_request -- XEN_NETTXF_* (any flags)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment