Commit c7286343 authored by Stephen Hemminger's avatar Stephen Hemminger Committed by David S. Miller

[NET]: Network packet type using RCU.

* packet type converted from linked list to list_macro
* writer lock replaced with spin lock, readers use RCU
* add __dev_remove_pack for callers that cant sleep.
* af_packet changes to handle and sleeping requirements, and possible
  races that could cause.
parent a269756e
...@@ -456,7 +456,7 @@ struct packet_type ...@@ -456,7 +456,7 @@ struct packet_type
int (*func) (struct sk_buff *, struct net_device *, int (*func) (struct sk_buff *, struct net_device *,
struct packet_type *); struct packet_type *);
void *data; /* Private to the packet type */ void *data; /* Private to the packet type */
struct packet_type *next; struct list_head list;
}; };
...@@ -472,6 +472,7 @@ extern int netdev_boot_setup_check(struct net_device *dev); ...@@ -472,6 +472,7 @@ extern int netdev_boot_setup_check(struct net_device *dev);
extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr); extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr);
extern void dev_add_pack(struct packet_type *pt); extern void dev_add_pack(struct packet_type *pt);
extern void dev_remove_pack(struct packet_type *pt); extern void dev_remove_pack(struct packet_type *pt);
extern void __dev_remove_pack(struct packet_type *pt);
extern int dev_get(const char *name); extern int dev_get(const char *name);
extern struct net_device *dev_get_by_flags(unsigned short flags, extern struct net_device *dev_get_by_flags(unsigned short flags,
unsigned short mask); unsigned short mask);
......
...@@ -90,7 +90,6 @@ ...@@ -90,7 +90,6 @@
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/brlock.h>
#include <net/sock.h> #include <net/sock.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
...@@ -170,8 +169,9 @@ const char *if_port_text[] = { ...@@ -170,8 +169,9 @@ const char *if_port_text[] = {
* 86DD IPv6 * 86DD IPv6
*/ */
static struct packet_type *ptype_base[16]; /* 16 way hashed list */ static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED;
static struct packet_type *ptype_all; /* Taps */ static struct list_head ptype_base[16]; /* 16 way hashed list */
static struct list_head ptype_all; /* Taps */
#ifdef OFFLINE_SAMPLE #ifdef OFFLINE_SAMPLE
static void sample_queue(unsigned long dummy); static void sample_queue(unsigned long dummy);
...@@ -239,14 +239,17 @@ int netdev_nit; ...@@ -239,14 +239,17 @@ int netdev_nit;
* Add a protocol handler to the networking stack. The passed &packet_type * Add a protocol handler to the networking stack. The passed &packet_type
* is linked into kernel lists and may not be freed until it has been * is linked into kernel lists and may not be freed until it has been
* removed from the kernel lists. * removed from the kernel lists.
*
* This call does not sleep therefore it can not
* guarantee all CPU's that are in middle of receiving packets
* will see the new packet type (until the next received packet).
*/ */
void dev_add_pack(struct packet_type *pt) void dev_add_pack(struct packet_type *pt)
{ {
int hash; int hash;
br_write_lock_bh(BR_NETPROTO_LOCK); spin_lock_bh(&ptype_lock);
#ifdef CONFIG_NET_FASTROUTE #ifdef CONFIG_NET_FASTROUTE
/* Hack to detect packet socket */ /* Hack to detect packet socket */
if (pt->data && (long)(pt->data) != 1) { if (pt->data && (long)(pt->data) != 1) {
...@@ -256,52 +259,76 @@ void dev_add_pack(struct packet_type *pt) ...@@ -256,52 +259,76 @@ void dev_add_pack(struct packet_type *pt)
#endif #endif
if (pt->type == htons(ETH_P_ALL)) { if (pt->type == htons(ETH_P_ALL)) {
netdev_nit++; netdev_nit++;
pt->next = ptype_all; list_add_rcu(&pt->list, &ptype_all);
ptype_all = pt;
} else { } else {
hash = ntohs(pt->type) & 15; hash = ntohs(pt->type) & 15;
pt->next = ptype_base[hash]; list_add_rcu(&pt->list, &ptype_base[hash]);
ptype_base[hash] = pt;
} }
br_write_unlock_bh(BR_NETPROTO_LOCK); spin_unlock_bh(&ptype_lock);
} }
extern void linkwatch_run_queue(void); extern void linkwatch_run_queue(void);
/** /**
* dev_remove_pack - remove packet handler * __dev_remove_pack - remove packet handler
* @pt: packet type declaration * @pt: packet type declaration
* *
* Remove a protocol handler that was previously added to the kernel * Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed * protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function * from the kernel lists and can be freed or reused once this function
* returns. * returns.
*
* The packet type might still be in use by receivers
* and must not be freed until after all the CPU's have gone
* through a quiescent state.
*/ */
void dev_remove_pack(struct packet_type *pt) void __dev_remove_pack(struct packet_type *pt)
{ {
struct packet_type **pt1; struct list_head *head;
struct packet_type *pt1;
br_write_lock_bh(BR_NETPROTO_LOCK); spin_lock_bh(&ptype_lock);
if (pt->type == htons(ETH_P_ALL)) { if (pt->type == htons(ETH_P_ALL)) {
netdev_nit--; netdev_nit--;
pt1 = &ptype_all; head = &ptype_all;
} else } else
pt1 = &ptype_base[ntohs(pt->type) & 15]; head = &ptype_base[ntohs(pt->type) & 15];
for (; *pt1; pt1 = &((*pt1)->next)) { list_for_each_entry(pt1, head, list) {
if (pt == *pt1) { if (pt == pt1) {
*pt1 = pt->next;
#ifdef CONFIG_NET_FASTROUTE #ifdef CONFIG_NET_FASTROUTE
if (pt->data) if (pt->data)
netdev_fastroute_obstacles--; netdev_fastroute_obstacles--;
#endif #endif
list_del_rcu(&pt->list);
goto out; goto out;
} }
} }
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
out: out:
br_write_unlock_bh(BR_NETPROTO_LOCK); spin_unlock_bh(&ptype_lock);
}
/**
* dev_remove_pack - remove packet handler
* @pt: packet type declaration
*
* Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function
* returns.
*
* This call sleeps to guarantee that no CPU is looking at the packet
* type after return.
*/
void dev_remove_pack(struct packet_type *pt)
{
__dev_remove_pack(pt);
synchronize_net();
} }
/****************************************************************************** /******************************************************************************
...@@ -943,8 +970,8 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) ...@@ -943,8 +970,8 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
struct packet_type *ptype; struct packet_type *ptype;
do_gettimeofday(&skb->stamp); do_gettimeofday(&skb->stamp);
br_read_lock(BR_NETPROTO_LOCK); rcu_read_lock();
for (ptype = ptype_all; ptype; ptype = ptype->next) { list_for_each_entry_rcu(ptype, &ptype_all, list) {
/* Never send packets back to the socket /* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org) * they originated from - MvS (miquels@drinkel.ow.org)
*/ */
...@@ -974,7 +1001,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) ...@@ -974,7 +1001,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
ptype->func(skb2, skb->dev, ptype); ptype->func(skb2, skb->dev, ptype);
} }
} }
br_read_unlock(BR_NETPROTO_LOCK); rcu_read_unlock();
} }
/* Calculate csum in the case, when packet is misrouted. /* Calculate csum in the case, when packet is misrouted.
...@@ -1488,7 +1515,8 @@ int netif_receive_skb(struct sk_buff *skb) ...@@ -1488,7 +1515,8 @@ int netif_receive_skb(struct sk_buff *skb)
skb->h.raw = skb->nh.raw = skb->data; skb->h.raw = skb->nh.raw = skb->data;
pt_prev = NULL; pt_prev = NULL;
for (ptype = ptype_all; ptype; ptype = ptype->next) { rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) { if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev) { if (pt_prev) {
if (!pt_prev->data) { if (!pt_prev->data) {
...@@ -1511,17 +1539,15 @@ int netif_receive_skb(struct sk_buff *skb) ...@@ -1511,17 +1539,15 @@ int netif_receive_skb(struct sk_buff *skb)
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
if (skb->dev->br_port) { if (skb->dev->br_port) {
int ret;
ret = handle_bridge(skb, pt_prev); ret = handle_bridge(skb, pt_prev);
if (br_handle_frame_hook(skb) == 0) if (br_handle_frame_hook(skb) == 0)
return ret; goto out;
pt_prev = NULL; pt_prev = NULL;
} }
#endif #endif
for (ptype = ptype_base[ntohs(type) & 15]; ptype; ptype = ptype->next) { list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type && if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) { (!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev) { if (pt_prev) {
...@@ -1552,6 +1578,8 @@ int netif_receive_skb(struct sk_buff *skb) ...@@ -1552,6 +1578,8 @@ int netif_receive_skb(struct sk_buff *skb)
ret = NET_RX_DROP; ret = NET_RX_DROP;
} }
out:
rcu_read_unlock();
return ret; return ret;
} }
...@@ -1625,7 +1653,8 @@ static void net_rx_action(struct softirq_action *h) ...@@ -1625,7 +1653,8 @@ static void net_rx_action(struct softirq_action *h)
unsigned long start_time = jiffies; unsigned long start_time = jiffies;
int budget = netdev_max_backlog; int budget = netdev_max_backlog;
br_read_lock(BR_NETPROTO_LOCK);
preempt_disable();
local_irq_disable(); local_irq_disable();
while (!list_empty(&queue->poll_list)) { while (!list_empty(&queue->poll_list)) {
...@@ -1654,7 +1683,7 @@ static void net_rx_action(struct softirq_action *h) ...@@ -1654,7 +1683,7 @@ static void net_rx_action(struct softirq_action *h)
} }
out: out:
local_irq_enable(); local_irq_enable();
br_read_unlock(BR_NETPROTO_LOCK); preempt_enable();
return; return;
softnet_break: softnet_break:
...@@ -1997,9 +2026,9 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) ...@@ -1997,9 +2026,9 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
dev_hold(master); dev_hold(master);
} }
br_write_lock_bh(BR_NETPROTO_LOCK);
slave->master = master; slave->master = master;
br_write_unlock_bh(BR_NETPROTO_LOCK);
synchronize_net();
if (old) if (old)
dev_put(old); dev_put(old);
...@@ -2663,8 +2692,8 @@ int netdev_finish_unregister(struct net_device *dev) ...@@ -2663,8 +2692,8 @@ int netdev_finish_unregister(struct net_device *dev)
/* Synchronize with packet receive processing. */ /* Synchronize with packet receive processing. */
void synchronize_net(void) void synchronize_net(void)
{ {
br_write_lock_bh(BR_NETPROTO_LOCK); might_sleep();
br_write_unlock_bh(BR_NETPROTO_LOCK); synchronize_kernel();
} }
/** /**
...@@ -2848,6 +2877,10 @@ static int __init net_dev_init(void) ...@@ -2848,6 +2877,10 @@ static int __init net_dev_init(void)
subsystem_register(&net_subsys); subsystem_register(&net_subsys);
INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < 16; i++)
INIT_LIST_HEAD(&ptype_base[i]);
#ifdef CONFIG_NET_DIVERT #ifdef CONFIG_NET_DIVERT
dv_init(); dv_init();
#endif /* CONFIG_NET_DIVERT */ #endif /* CONFIG_NET_DIVERT */
......
...@@ -570,6 +570,7 @@ EXPORT_SYMBOL(netif_rx); ...@@ -570,6 +570,7 @@ EXPORT_SYMBOL(netif_rx);
EXPORT_SYMBOL(netif_receive_skb); EXPORT_SYMBOL(netif_receive_skb);
EXPORT_SYMBOL(dev_add_pack); EXPORT_SYMBOL(dev_add_pack);
EXPORT_SYMBOL(dev_remove_pack); EXPORT_SYMBOL(dev_remove_pack);
EXPORT_SYMBOL(__dev_remove_pack);
EXPORT_SYMBOL(dev_get); EXPORT_SYMBOL(dev_get);
EXPORT_SYMBOL(dev_alloc); EXPORT_SYMBOL(dev_alloc);
EXPORT_SYMBOL(dev_alloc_name); EXPORT_SYMBOL(dev_alloc_name);
......
...@@ -774,6 +774,7 @@ static int packet_release(struct socket *sock) ...@@ -774,6 +774,7 @@ static int packet_release(struct socket *sock)
*/ */
dev_remove_pack(&po->prot_hook); dev_remove_pack(&po->prot_hook);
po->running = 0; po->running = 0;
po->num = 0;
__sock_put(sk); __sock_put(sk);
} }
...@@ -819,9 +820,12 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol) ...@@ -819,9 +820,12 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
spin_lock(&po->bind_lock); spin_lock(&po->bind_lock);
if (po->running) { if (po->running) {
dev_remove_pack(&po->prot_hook);
__sock_put(sk); __sock_put(sk);
po->running = 0; po->running = 0;
po->num = 0;
spin_unlock(&po->bind_lock);
dev_remove_pack(&po->prot_hook);
spin_lock(&po->bind_lock);
} }
po->num = protocol; po->num = protocol;
...@@ -1374,7 +1378,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void ...@@ -1374,7 +1378,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
if (dev->ifindex == po->ifindex) { if (dev->ifindex == po->ifindex) {
spin_lock(&po->bind_lock); spin_lock(&po->bind_lock);
if (po->running) { if (po->running) {
dev_remove_pack(&po->prot_hook); __dev_remove_pack(&po->prot_hook);
__sock_put(sk); __sock_put(sk);
po->running = 0; po->running = 0;
sk->err = ENETDOWN; sk->err = ENETDOWN;
...@@ -1618,10 +1622,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing ...@@ -1618,10 +1622,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
/* Detach socket from network */ /* Detach socket from network */
spin_lock(&po->bind_lock); spin_lock(&po->bind_lock);
if (po->running) if (po->running) {
dev_remove_pack(&po->prot_hook); __dev_remove_pack(&po->prot_hook);
po->num = 0;
po->running = 0;
}
spin_unlock(&po->bind_lock); spin_unlock(&po->bind_lock);
synchronize_net();
err = -EBUSY; err = -EBUSY;
if (closing || atomic_read(&po->mapped) == 0) { if (closing || atomic_read(&po->mapped) == 0) {
err = 0; err = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment