Commit 567f4872 authored by Arthur Kepner's avatar Arthur Kepner Committed by David S. Miller

[NET]: Lockless loopback patch (version 2).

parent e2580c9e
......@@ -56,6 +56,7 @@
#include <linux/ip.h>
#include <linux/tcp.h>
static struct net_device_stats *loopback_stats;
#define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
......@@ -123,7 +124,6 @@ static void emulate_large_send_offload(struct sk_buff *skb)
*/
static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct net_device_stats *stats = dev->priv;
skb_orphan(skb);
......@@ -142,11 +142,12 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
}
dev->last_rx = jiffies;
if (likely(stats)) {
stats->rx_bytes+=skb->len;
stats->tx_bytes+=skb->len;
stats->rx_packets++;
stats->tx_packets++;
if (likely(loopback_stats)) {
get_cpu_ptr(loopback_stats)->rx_bytes += skb->len;
get_cpu_ptr(loopback_stats)->tx_bytes += skb->len;
get_cpu_ptr(loopback_stats)->rx_packets++;
get_cpu_ptr(loopback_stats)->tx_packets++;
put_cpu_ptr(loopback_stats);
}
netif_rx(skb);
......@@ -156,7 +157,28 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
static struct net_device_stats *get_stats(struct net_device *dev)
{
return (struct net_device_stats *)dev->priv;
struct net_device_stats *stats = dev->priv;
int i;
if (!stats) {
return NULL;
}
memset(stats, 0, sizeof(struct net_device_stats));
if (!loopback_stats) {
return stats;
}
for (i=0; i < NR_CPUS; i++) {
if (!cpu_possible(i))
continue;
stats->rx_bytes += per_cpu_ptr(loopback_stats, i)->rx_bytes;
stats->tx_bytes += per_cpu_ptr(loopback_stats, i)->tx_bytes;
stats->rx_packets += per_cpu_ptr(loopback_stats, i)->rx_packets;
stats->tx_packets += per_cpu_ptr(loopback_stats, i)->tx_packets;
}
return stats;
}
struct net_device loopback_dev = {
......@@ -173,7 +195,8 @@ struct net_device loopback_dev = {
.rebuild_header = eth_rebuild_header,
.flags = IFF_LOOPBACK,
.features = NETIF_F_SG|NETIF_F_FRAGLIST
|NETIF_F_NO_CSUM|NETIF_F_HIGHDMA,
|NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
|NETIF_F_LLTX,
};
/* Setup and register the of the LOOPBACK device. */
......@@ -189,6 +212,8 @@ int __init loopback_init(void)
loopback_dev.get_stats = &get_stats;
}
loopback_stats = alloc_percpu(struct net_device_stats);
return register_netdev(&loopback_dev);
};
......
......@@ -405,6 +405,7 @@ struct net_device
#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
#define NETIF_F_LLTX 4096 /* LockLess TX */
/* Called after device is detached from network. */
void (*uninit)(struct net_device *dev);
......
......@@ -11,6 +11,7 @@
#include <linux/netdevice.h>
#include <linux/types.h>
#include <linux/pkt_sched.h>
#include <linux/rcupdate.h>
#include <net/pkt_cls.h>
#ifdef CONFIG_X86_TSC
......@@ -92,6 +93,7 @@ struct Qdisc
struct net_device *dev;
struct tc_stats stats;
struct rcu_head q_rcu;
int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q);
/* This field is deprecated, but it is still used by CBQ
......
......@@ -107,6 +107,7 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/netpoll.h>
#include <linux/rcupdate.h>
#ifdef CONFIG_NET_RADIO
#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
#include <net/iw_handler.h>
......@@ -1305,6 +1306,20 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask)
return 0;
}
#define HARD_TX_LOCK_BH(dev, cpu) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
spin_lock_bh(&dev->xmit_lock); \
dev->xmit_lock_owner = cpu; \
} \
}
#define HARD_TX_UNLOCK_BH(dev) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
dev->xmit_lock_owner = -1; \
spin_unlock_bh(&dev->xmit_lock); \
} \
}
/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
......@@ -1348,18 +1363,35 @@ int dev_queue_xmit(struct sk_buff *skb)
if (skb_checksum_help(&skb, 0))
goto out_kfree_skb;
/* Grab device queue */
spin_lock_bh(&dev->queue_lock);
rcu_read_lock();
/* Updates of qdisc are serialized by queue_lock.
* The struct Qdisc which is pointed to by qdisc is now a
* rcu structure - it may be accessed without acquiring
* a lock (but the structure may be stale.) The freeing of the
* qdisc will be deferred until it's known that there are no
* more references to it.
*
* If the qdisc has an enqueue function, we still need to
* hold the queue_lock before calling it, since queue_lock
* also serializes access to the device queue.
*/
q = dev->qdisc;
smp_read_barrier_depends();
if (q->enqueue) {
/* Grab device queue */
spin_lock_bh(&dev->queue_lock);
rc = q->enqueue(skb, q);
qdisc_run(dev);
spin_unlock_bh(&dev->queue_lock);
rcu_read_unlock();
rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
goto out;
}
rcu_read_unlock();
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
......@@ -1374,17 +1406,12 @@ int dev_queue_xmit(struct sk_buff *skb)
Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags & IFF_UP) {
preempt_disable();
int cpu = smp_processor_id();
if (dev->xmit_lock_owner != cpu) {
/*
* The spin_lock effectivly does a preempt lock, but
* we are about to drop that...
*/
preempt_disable();
spin_unlock(&dev->queue_lock);
spin_lock(&dev->xmit_lock);
dev->xmit_lock_owner = cpu;
HARD_TX_LOCK_BH(dev, cpu);
preempt_enable();
if (!netif_queue_stopped(dev)) {
......@@ -1393,18 +1420,17 @@ int dev_queue_xmit(struct sk_buff *skb)
rc = 0;
if (!dev->hard_start_xmit(skb, dev)) {
dev->xmit_lock_owner = -1;
spin_unlock_bh(&dev->xmit_lock);
HARD_TX_UNLOCK_BH(dev);
goto out;
}
}
dev->xmit_lock_owner = -1;
spin_unlock_bh(&dev->xmit_lock);
HARD_TX_UNLOCK_BH(dev);
if (net_ratelimit())
printk(KERN_CRIT "Virtual device %s asks to "
"queue packet!\n", dev->name);
goto out_enetdown;
} else {
preempt_enable();
/* Recursion is detected! It is possible,
* unfortunately */
if (net_ratelimit())
......@@ -1412,7 +1438,6 @@ int dev_queue_xmit(struct sk_buff *skb)
"%s, fix it urgently!\n", dev->name);
}
}
spin_unlock_bh(&dev->queue_lock);
out_enetdown:
rc = -ENETDOWN;
out_kfree_skb:
......
......@@ -450,6 +450,9 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
if (!try_module_get(ops->owner))
goto err_out;
/* enqueue is accessed locklessly - make sure it's visible
* before we set a netdevice's qdisc pointer to sch */
smp_wmb();
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
write_lock(&qdisc_tree_lock);
sch->next = dev->qdisc_list;
......
......@@ -30,6 +30,7 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
......@@ -387,6 +388,9 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
sch->dev = dev;
sch->stats.lock = &dev->queue_lock;
atomic_set(&sch->refcnt, 1);
/* enqueue is accessed locklessly - make sure it's visible
* before we set a netdevice's qdisc pointer to sch */
smp_wmb();
if (!ops->init || ops->init(sch, NULL) == 0)
return sch;
......@@ -404,18 +408,36 @@ void qdisc_reset(struct Qdisc *qdisc)
ops->reset(qdisc);
}
/* this is the rcu callback function to clean up a qdisc when there
* are no further references to it */
static void __qdisc_destroy (void * arg)
{
struct Qdisc *qdisc = (struct Qdisc *) arg;
struct Qdisc_ops *ops = qdisc->ops;
#ifdef CONFIG_NET_ESTIMATOR
qdisc_kill_estimator(&qdisc->stats);
#endif
if (ops->reset)
ops->reset(qdisc);
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
if (!(qdisc->flags&TCQ_F_BUILTIN))
kfree(qdisc);
}
/* Under dev->queue_lock and BH! */
void qdisc_destroy(struct Qdisc *qdisc)
{
struct Qdisc_ops *ops = qdisc->ops;
struct net_device *dev;
struct net_device *dev = qdisc->dev;
if (!atomic_dec_and_test(&qdisc->refcnt))
return;
dev = qdisc->dev;
if (dev) {
struct Qdisc *q, **qp;
for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) {
......@@ -425,16 +447,9 @@ void qdisc_destroy(struct Qdisc *qdisc)
}
}
}
#ifdef CONFIG_NET_ESTIMATOR
qdisc_kill_estimator(&qdisc->stats);
#endif
if (ops->reset)
ops->reset(qdisc);
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
if (!(qdisc->flags&TCQ_F_BUILTIN))
kfree(qdisc);
call_rcu(&qdisc->q_rcu, __qdisc_destroy, qdisc);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment