Commit 567f4872 authored by Arthur Kepner's avatar Arthur Kepner Committed by David S. Miller

[NET]: Lockless loopback patch (version 2).

parent e2580c9e
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/tcp.h> #include <linux/tcp.h>
static struct net_device_stats *loopback_stats;
#define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16) #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
...@@ -123,7 +124,6 @@ static void emulate_large_send_offload(struct sk_buff *skb) ...@@ -123,7 +124,6 @@ static void emulate_large_send_offload(struct sk_buff *skb)
*/ */
static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
{ {
struct net_device_stats *stats = dev->priv;
skb_orphan(skb); skb_orphan(skb);
...@@ -142,11 +142,12 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -142,11 +142,12 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
} }
dev->last_rx = jiffies; dev->last_rx = jiffies;
if (likely(stats)) { if (likely(loopback_stats)) {
stats->rx_bytes+=skb->len; get_cpu_ptr(loopback_stats)->rx_bytes += skb->len;
stats->tx_bytes+=skb->len; get_cpu_ptr(loopback_stats)->tx_bytes += skb->len;
stats->rx_packets++; get_cpu_ptr(loopback_stats)->rx_packets++;
stats->tx_packets++; get_cpu_ptr(loopback_stats)->tx_packets++;
put_cpu_ptr(loopback_stats);
} }
netif_rx(skb); netif_rx(skb);
...@@ -156,7 +157,28 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -156,7 +157,28 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
static struct net_device_stats *get_stats(struct net_device *dev) static struct net_device_stats *get_stats(struct net_device *dev)
{ {
return (struct net_device_stats *)dev->priv; struct net_device_stats *stats = dev->priv;
int i;
if (!stats) {
return NULL;
}
memset(stats, 0, sizeof(struct net_device_stats));
if (!loopback_stats) {
return stats;
}
for (i=0; i < NR_CPUS; i++) {
if (!cpu_possible(i))
continue;
stats->rx_bytes += per_cpu_ptr(loopback_stats, i)->rx_bytes;
stats->tx_bytes += per_cpu_ptr(loopback_stats, i)->tx_bytes;
stats->rx_packets += per_cpu_ptr(loopback_stats, i)->rx_packets;
stats->tx_packets += per_cpu_ptr(loopback_stats, i)->tx_packets;
}
return stats;
} }
struct net_device loopback_dev = { struct net_device loopback_dev = {
...@@ -173,7 +195,8 @@ struct net_device loopback_dev = { ...@@ -173,7 +195,8 @@ struct net_device loopback_dev = {
.rebuild_header = eth_rebuild_header, .rebuild_header = eth_rebuild_header,
.flags = IFF_LOOPBACK, .flags = IFF_LOOPBACK,
.features = NETIF_F_SG|NETIF_F_FRAGLIST .features = NETIF_F_SG|NETIF_F_FRAGLIST
|NETIF_F_NO_CSUM|NETIF_F_HIGHDMA, |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
|NETIF_F_LLTX,
}; };
/* Setup and register the of the LOOPBACK device. */ /* Setup and register the of the LOOPBACK device. */
...@@ -188,6 +211,8 @@ int __init loopback_init(void) ...@@ -188,6 +211,8 @@ int __init loopback_init(void)
loopback_dev.priv = stats; loopback_dev.priv = stats;
loopback_dev.get_stats = &get_stats; loopback_dev.get_stats = &get_stats;
} }
loopback_stats = alloc_percpu(struct net_device_stats);
return register_netdev(&loopback_dev); return register_netdev(&loopback_dev);
}; };
......
...@@ -405,6 +405,7 @@ struct net_device ...@@ -405,6 +405,7 @@ struct net_device
#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ #define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
#define NETIF_F_LLTX 4096 /* LockLess TX */
/* Called after device is detached from network. */ /* Called after device is detached from network. */
void (*uninit)(struct net_device *dev); void (*uninit)(struct net_device *dev);
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/pkt_sched.h> #include <linux/pkt_sched.h>
#include <linux/rcupdate.h>
#include <net/pkt_cls.h> #include <net/pkt_cls.h>
#ifdef CONFIG_X86_TSC #ifdef CONFIG_X86_TSC
...@@ -92,6 +93,7 @@ struct Qdisc ...@@ -92,6 +93,7 @@ struct Qdisc
struct net_device *dev; struct net_device *dev;
struct tc_stats stats; struct tc_stats stats;
struct rcu_head q_rcu;
int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q); int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q);
/* This field is deprecated, but it is still used by CBQ /* This field is deprecated, but it is still used by CBQ
......
...@@ -107,6 +107,7 @@ ...@@ -107,6 +107,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/netpoll.h> #include <linux/netpoll.h>
#include <linux/rcupdate.h>
#ifdef CONFIG_NET_RADIO #ifdef CONFIG_NET_RADIO
#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
#include <net/iw_handler.h> #include <net/iw_handler.h>
...@@ -1305,6 +1306,20 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask) ...@@ -1305,6 +1306,20 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask)
return 0; return 0;
} }
#define HARD_TX_LOCK_BH(dev, cpu) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
spin_lock_bh(&dev->xmit_lock); \
dev->xmit_lock_owner = cpu; \
} \
}
#define HARD_TX_UNLOCK_BH(dev) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
dev->xmit_lock_owner = -1; \
spin_unlock_bh(&dev->xmit_lock); \
} \
}
/** /**
* dev_queue_xmit - transmit a buffer * dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit * @skb: buffer to transmit
...@@ -1348,18 +1363,35 @@ int dev_queue_xmit(struct sk_buff *skb) ...@@ -1348,18 +1363,35 @@ int dev_queue_xmit(struct sk_buff *skb)
if (skb_checksum_help(&skb, 0)) if (skb_checksum_help(&skb, 0))
goto out_kfree_skb; goto out_kfree_skb;
/* Grab device queue */ rcu_read_lock();
spin_lock_bh(&dev->queue_lock); /* Updates of qdisc are serialized by queue_lock.
* The struct Qdisc which is pointed to by qdisc is now a
* rcu structure - it may be accessed without acquiring
* a lock (but the structure may be stale.) The freeing of the
* qdisc will be deferred until it's known that there are no
* more references to it.
*
* If the qdisc has an enqueue function, we still need to
* hold the queue_lock before calling it, since queue_lock
* also serializes access to the device queue.
*/
q = dev->qdisc; q = dev->qdisc;
smp_read_barrier_depends();
if (q->enqueue) { if (q->enqueue) {
/* Grab device queue */
spin_lock_bh(&dev->queue_lock);
rc = q->enqueue(skb, q); rc = q->enqueue(skb, q);
qdisc_run(dev); qdisc_run(dev);
spin_unlock_bh(&dev->queue_lock); spin_unlock_bh(&dev->queue_lock);
rcu_read_unlock();
rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
goto out; goto out;
} }
rcu_read_unlock();
/* The device has no queue. Common case for software devices: /* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels... loopback, all the sorts of tunnels...
...@@ -1374,17 +1406,12 @@ int dev_queue_xmit(struct sk_buff *skb) ...@@ -1374,17 +1406,12 @@ int dev_queue_xmit(struct sk_buff *skb)
Either shot noqueue qdisc, it is even simpler 8) Either shot noqueue qdisc, it is even simpler 8)
*/ */
if (dev->flags & IFF_UP) { if (dev->flags & IFF_UP) {
preempt_disable();
int cpu = smp_processor_id(); int cpu = smp_processor_id();
if (dev->xmit_lock_owner != cpu) { if (dev->xmit_lock_owner != cpu) {
/*
* The spin_lock effectivly does a preempt lock, but HARD_TX_LOCK_BH(dev, cpu);
* we are about to drop that...
*/
preempt_disable();
spin_unlock(&dev->queue_lock);
spin_lock(&dev->xmit_lock);
dev->xmit_lock_owner = cpu;
preempt_enable(); preempt_enable();
if (!netif_queue_stopped(dev)) { if (!netif_queue_stopped(dev)) {
...@@ -1393,18 +1420,17 @@ int dev_queue_xmit(struct sk_buff *skb) ...@@ -1393,18 +1420,17 @@ int dev_queue_xmit(struct sk_buff *skb)
rc = 0; rc = 0;
if (!dev->hard_start_xmit(skb, dev)) { if (!dev->hard_start_xmit(skb, dev)) {
dev->xmit_lock_owner = -1; HARD_TX_UNLOCK_BH(dev);
spin_unlock_bh(&dev->xmit_lock);
goto out; goto out;
} }
} }
dev->xmit_lock_owner = -1; HARD_TX_UNLOCK_BH(dev);
spin_unlock_bh(&dev->xmit_lock);
if (net_ratelimit()) if (net_ratelimit())
printk(KERN_CRIT "Virtual device %s asks to " printk(KERN_CRIT "Virtual device %s asks to "
"queue packet!\n", dev->name); "queue packet!\n", dev->name);
goto out_enetdown; goto out_enetdown;
} else { } else {
preempt_enable();
/* Recursion is detected! It is possible, /* Recursion is detected! It is possible,
* unfortunately */ * unfortunately */
if (net_ratelimit()) if (net_ratelimit())
...@@ -1412,7 +1438,6 @@ int dev_queue_xmit(struct sk_buff *skb) ...@@ -1412,7 +1438,6 @@ int dev_queue_xmit(struct sk_buff *skb)
"%s, fix it urgently!\n", dev->name); "%s, fix it urgently!\n", dev->name);
} }
} }
spin_unlock_bh(&dev->queue_lock);
out_enetdown: out_enetdown:
rc = -ENETDOWN; rc = -ENETDOWN;
out_kfree_skb: out_kfree_skb:
......
...@@ -450,6 +450,9 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) ...@@ -450,6 +450,9 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
if (!try_module_get(ops->owner)) if (!try_module_get(ops->owner))
goto err_out; goto err_out;
/* enqueue is accessed locklessly - make sure it's visible
* before we set a netdevice's qdisc pointer to sch */
smp_wmb();
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
write_lock(&qdisc_tree_lock); write_lock(&qdisc_tree_lock);
sch->next = dev->qdisc_list; sch->next = dev->qdisc_list;
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/rcupdate.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
...@@ -387,6 +388,9 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) ...@@ -387,6 +388,9 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
sch->dev = dev; sch->dev = dev;
sch->stats.lock = &dev->queue_lock; sch->stats.lock = &dev->queue_lock;
atomic_set(&sch->refcnt, 1); atomic_set(&sch->refcnt, 1);
/* enqueue is accessed locklessly - make sure it's visible
* before we set a netdevice's qdisc pointer to sch */
smp_wmb();
if (!ops->init || ops->init(sch, NULL) == 0) if (!ops->init || ops->init(sch, NULL) == 0)
return sch; return sch;
...@@ -404,18 +408,36 @@ void qdisc_reset(struct Qdisc *qdisc) ...@@ -404,18 +408,36 @@ void qdisc_reset(struct Qdisc *qdisc)
ops->reset(qdisc); ops->reset(qdisc);
} }
/* this is the rcu callback function to clean up a qdisc when there
* are no further references to it */
static void __qdisc_destroy (void * arg)
{
struct Qdisc *qdisc = (struct Qdisc *) arg;
struct Qdisc_ops *ops = qdisc->ops;
#ifdef CONFIG_NET_ESTIMATOR
qdisc_kill_estimator(&qdisc->stats);
#endif
if (ops->reset)
ops->reset(qdisc);
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
if (!(qdisc->flags&TCQ_F_BUILTIN))
kfree(qdisc);
}
/* Under dev->queue_lock and BH! */ /* Under dev->queue_lock and BH! */
void qdisc_destroy(struct Qdisc *qdisc) void qdisc_destroy(struct Qdisc *qdisc)
{ {
struct Qdisc_ops *ops = qdisc->ops; struct net_device *dev = qdisc->dev;
struct net_device *dev;
if (!atomic_dec_and_test(&qdisc->refcnt)) if (!atomic_dec_and_test(&qdisc->refcnt))
return; return;
dev = qdisc->dev;
if (dev) { if (dev) {
struct Qdisc *q, **qp; struct Qdisc *q, **qp;
for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) { for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) {
...@@ -425,16 +447,9 @@ void qdisc_destroy(struct Qdisc *qdisc) ...@@ -425,16 +447,9 @@ void qdisc_destroy(struct Qdisc *qdisc)
} }
} }
} }
#ifdef CONFIG_NET_ESTIMATOR
qdisc_kill_estimator(&qdisc->stats); call_rcu(&qdisc->q_rcu, __qdisc_destroy, qdisc);
#endif
if (ops->reset)
ops->reset(qdisc);
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
if (!(qdisc->flags&TCQ_F_BUILTIN))
kfree(qdisc);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment