Commit 4f78b8d1 authored by David S. Miller's avatar David S. Miller

Merge nuts.ninka.net:/home/davem/src/BK/network-2.5

into nuts.ninka.net:/home/davem/src/BK/net-2.5
parents 0f7923d7 36f29ee0
...@@ -2343,14 +2343,9 @@ static void ppp_shutdown_interface(struct ppp *ppp) ...@@ -2343,14 +2343,9 @@ static void ppp_shutdown_interface(struct ppp *ppp)
dev = ppp->dev; dev = ppp->dev;
ppp->dev = 0; ppp->dev = 0;
ppp_unlock(ppp); ppp_unlock(ppp);
if (dev) {
rtnl_lock();
/* This will call dev_close() for us. */ /* This will call dev_close() for us. */
unregister_netdevice(dev); if (dev)
unregister_netdev(dev);
rtnl_unlock();
}
cardmap_set(&all_ppp_units, ppp->file.index, NULL); cardmap_set(&all_ppp_units, ppp->file.index, NULL);
ppp->file.dead = 1; ppp->file.dead = 1;
ppp->owner = NULL; ppp->owner = NULL;
......
...@@ -83,12 +83,7 @@ ...@@ -83,12 +83,7 @@
#define SLIP_VERSION "0.8.4-NET3.019-NEWTTY" #define SLIP_VERSION "0.8.4-NET3.019-NEWTTY"
static struct net_device **slip_devs;
typedef struct slip_ctrl {
struct slip ctrl; /* SLIP things */
struct net_device dev; /* the device */
} slip_ctrl_t;
static slip_ctrl_t **slip_ctrls;
int slip_maxdev = SL_NRUNIT; /* Can be overridden with insmod! */ int slip_maxdev = SL_NRUNIT; /* Can be overridden with insmod! */
MODULE_PARM(slip_maxdev, "i"); MODULE_PARM(slip_maxdev, "i");
...@@ -624,32 +619,45 @@ static int sl_init(struct net_device *dev) ...@@ -624,32 +619,45 @@ static int sl_init(struct net_device *dev)
*/ */
dev->mtu = sl->mtu; dev->mtu = sl->mtu;
dev->hard_start_xmit = sl_xmit; dev->type = ARPHRD_SLIP + sl->mode;
#ifdef SL_CHECK_TRANSMIT #ifdef SL_CHECK_TRANSMIT
dev->tx_timeout = sl_tx_timeout; dev->tx_timeout = sl_tx_timeout;
dev->watchdog_timeo = 20*HZ; dev->watchdog_timeo = 20*HZ;
#endif #endif
return 0;
}
static void sl_uninit(struct net_device *dev)
{
struct slip *sl = (struct slip*)(dev->priv);
sl_free_bufs(sl);
}
static void sl_setup(struct net_device *dev)
{
dev->init = sl_init;
dev->uninit = sl_uninit;
dev->open = sl_open; dev->open = sl_open;
dev->destructor = (void (*)(struct net_device *))kfree;
dev->stop = sl_close; dev->stop = sl_close;
dev->get_stats = sl_get_stats; dev->get_stats = sl_get_stats;
dev->change_mtu = sl_change_mtu; dev->change_mtu = sl_change_mtu;
dev->hard_start_xmit = sl_xmit;
#ifdef CONFIG_SLIP_SMART #ifdef CONFIG_SLIP_SMART
dev->do_ioctl = sl_ioctl; dev->do_ioctl = sl_ioctl;
#endif #endif
dev->hard_header_len = 0; dev->hard_header_len = 0;
dev->addr_len = 0; dev->addr_len = 0;
dev->type = ARPHRD_SLIP + sl->mode;
dev->tx_queue_len = 10; dev->tx_queue_len = 10;
SET_MODULE_OWNER(dev); SET_MODULE_OWNER(dev);
/* New-style flags. */ /* New-style flags. */
dev->flags = IFF_NOARP|IFF_POINTOPOINT|IFF_MULTICAST; dev->flags = IFF_NOARP|IFF_POINTOPOINT|IFF_MULTICAST;
return 0;
} }
/****************************************** /******************************************
Routines looking at TTY side. Routines looking at TTY side.
******************************************/ ******************************************/
...@@ -702,52 +710,57 @@ static void slip_receive_buf(struct tty_struct *tty, const unsigned char *cp, ch ...@@ -702,52 +710,57 @@ static void slip_receive_buf(struct tty_struct *tty, const unsigned char *cp, ch
static void sl_sync(void) static void sl_sync(void)
{ {
int i; int i;
struct net_device *dev;
struct slip *sl;
for (i = 0; i < slip_maxdev; i++) { for (i = 0; i < slip_maxdev; i++) {
slip_ctrl_t *slp = slip_ctrls[i]; if ((dev = slip_devs[i]) == NULL)
if (slp == NULL)
break; break;
if (slp->ctrl.tty || slp->ctrl.leased)
sl = dev->priv;
if (sl->tty || sl->leased)
continue; continue;
if (slp->dev.flags&IFF_UP) if (dev->flags&IFF_UP)
dev_close(&slp->dev); dev_close(dev);
} }
} }
/* Find a free SLIP channel, and link in this `tty' line. */ /* Find a free SLIP channel, and link in this `tty' line. */
static struct slip * static struct slip *
sl_alloc(dev_t line) sl_alloc(dev_t line)
{ {
struct slip *sl;
slip_ctrl_t *slp = NULL;
int i; int i;
int sel = -1; int sel = -1;
int score = -1; int score = -1;
struct net_device *dev = NULL;
struct slip *sl;
if (slip_ctrls == NULL) if (slip_devs == NULL)
return NULL; /* Master array missing ! */ return NULL; /* Master array missing ! */
for (i = 0; i < slip_maxdev; i++) { for (i = 0; i < slip_maxdev; i++) {
slp = slip_ctrls[i]; dev = slip_devs[i];
if (slp == NULL) if (dev == NULL)
break; break;
if (slp->ctrl.leased) { sl = dev->priv;
if (slp->ctrl.line != line) if (sl->leased) {
if (sl->line != line)
continue; continue;
if (slp->ctrl.tty) if (sl->tty)
return NULL; return NULL;
/* Clear ESCAPE & ERROR flags */ /* Clear ESCAPE & ERROR flags */
slp->ctrl.flags &= (1 << SLF_INUSE); sl->flags &= (1 << SLF_INUSE);
return &slp->ctrl; return sl;
} }
if (slp->ctrl.tty) if (sl->tty)
continue; continue;
if (current->pid == slp->ctrl.pid) { if (current->pid == sl->pid) {
if (slp->ctrl.line == line && score < 3) { if (sl->line == line && score < 3) {
sel = i; sel = i;
score = 3; score = 3;
continue; continue;
...@@ -758,7 +771,7 @@ sl_alloc(dev_t line) ...@@ -758,7 +771,7 @@ sl_alloc(dev_t line)
} }
continue; continue;
} }
if (slp->ctrl.line == line && score < 1) { if (sl->line == line && score < 1) {
sel = i; sel = i;
score = 1; score = 1;
continue; continue;
...@@ -771,10 +784,11 @@ sl_alloc(dev_t line) ...@@ -771,10 +784,11 @@ sl_alloc(dev_t line)
if (sel >= 0) { if (sel >= 0) {
i = sel; i = sel;
slp = slip_ctrls[i]; dev = slip_devs[i];
if (score > 1) { if (score > 1) {
slp->ctrl.flags &= (1 << SLF_INUSE); sl = dev->priv;
return &slp->ctrl; sl->flags &= (1 << SLF_INUSE);
return sl;
} }
} }
...@@ -782,26 +796,32 @@ sl_alloc(dev_t line) ...@@ -782,26 +796,32 @@ sl_alloc(dev_t line)
if (i >= slip_maxdev) if (i >= slip_maxdev)
return NULL; return NULL;
if (slp) { if (dev) {
if (test_bit(SLF_INUSE, &slp->ctrl.flags)) { sl = dev->priv;
unregister_netdevice(&slp->dev); if (test_bit(SLF_INUSE, &sl->flags)) {
sl_free_bufs(&slp->ctrl); unregister_netdevice(dev);
dev = NULL;
slip_devs[i] = NULL;
}
} }
} else if ((slp = (slip_ctrl_t *)kmalloc(sizeof(slip_ctrl_t),GFP_KERNEL)) == NULL)
if (!dev) {
char name[IFNAMSIZ];
sprintf(name, "sl%d", i);
dev = alloc_netdev(sizeof(*sl), name, sl_setup);
if (!dev)
return NULL; return NULL;
dev->base_addr = i;
}
memset(slp, 0, sizeof(slip_ctrl_t)); sl = dev->priv;
sl = &slp->ctrl;
/* Initialize channel control data */ /* Initialize channel control data */
sl->magic = SLIP_MAGIC; sl->magic = SLIP_MAGIC;
sl->dev = &slp->dev; sl->dev = dev;
spin_lock_init(&sl->lock); spin_lock_init(&sl->lock);
sl->mode = SL_MODE_DEFAULT; sl->mode = SL_MODE_DEFAULT;
sprintf(slp->dev.name, "sl%d", i);
slp->dev.base_addr = i;
slp->dev.priv = (void*)sl;
slp->dev.init = sl_init;
#ifdef CONFIG_SLIP_SMART #ifdef CONFIG_SLIP_SMART
init_timer(&sl->keepalive_timer); /* initialize timer_list struct */ init_timer(&sl->keepalive_timer); /* initialize timer_list struct */
sl->keepalive_timer.data=(unsigned long)sl; sl->keepalive_timer.data=(unsigned long)sl;
...@@ -810,8 +830,9 @@ sl_alloc(dev_t line) ...@@ -810,8 +830,9 @@ sl_alloc(dev_t line)
sl->outfill_timer.data=(unsigned long)sl; sl->outfill_timer.data=(unsigned long)sl;
sl->outfill_timer.function=sl_outfill; sl->outfill_timer.function=sl_outfill;
#endif #endif
slip_ctrls[i] = slp; slip_devs[i] = dev;
return &slp->ctrl;
return sl;
} }
/* /*
...@@ -865,12 +886,10 @@ slip_open(struct tty_struct *tty) ...@@ -865,12 +886,10 @@ slip_open(struct tty_struct *tty)
if ((err = sl_alloc_bufs(sl, SL_MTU)) != 0) if ((err = sl_alloc_bufs(sl, SL_MTU)) != 0)
goto err_free_chan; goto err_free_chan;
if (register_netdevice(sl->dev)) {
sl_free_bufs(sl);
goto err_free_chan;
}
set_bit(SLF_INUSE, &sl->flags); set_bit(SLF_INUSE, &sl->flags);
if ((err = register_netdevice(sl->dev)))
goto err_free_bufs;
} }
#ifdef CONFIG_SLIP_SMART #ifdef CONFIG_SLIP_SMART
...@@ -888,6 +907,9 @@ slip_open(struct tty_struct *tty) ...@@ -888,6 +907,9 @@ slip_open(struct tty_struct *tty)
rtnl_unlock(); rtnl_unlock();
return sl->dev->base_addr; return sl->dev->base_addr;
err_free_bufs:
sl_free_bufs(sl);
err_free_chan: err_free_chan:
sl->tty = NULL; sl->tty = NULL;
tty->disc_data = NULL; tty->disc_data = NULL;
...@@ -1335,14 +1357,14 @@ static int __init slip_init(void) ...@@ -1335,14 +1357,14 @@ static int __init slip_init(void)
printk(KERN_INFO "SLIP linefill/keepalive option.\n"); printk(KERN_INFO "SLIP linefill/keepalive option.\n");
#endif #endif
slip_ctrls = kmalloc(sizeof(void*)*slip_maxdev, GFP_KERNEL); slip_devs = kmalloc(sizeof(struct net_device *)*slip_maxdev, GFP_KERNEL);
if (!slip_ctrls) { if (!slip_devs) {
printk(KERN_ERR "SLIP: Can't allocate slip_ctrls[] array! Uaargh! (-> No SLIP available)\n"); printk(KERN_ERR "SLIP: Can't allocate slip devices array! Uaargh! (-> No SLIP available)\n");
return -ENOMEM; return -ENOMEM;
} }
/* Clear the pointer array, we allocate devices when we need them */ /* Clear the pointer array, we allocate devices when we need them */
memset(slip_ctrls, 0, sizeof(void*)*slip_maxdev); /* Pointers */ memset(slip_devs, 0, sizeof(struct net_device *)*slip_maxdev);
/* Fill in our line protocol discipline, and register it */ /* Fill in our line protocol discipline, and register it */
if ((status = tty_register_ldisc(N_SLIP, &sl_ldisc)) != 0) { if ((status = tty_register_ldisc(N_SLIP, &sl_ldisc)) != 0) {
...@@ -1354,51 +1376,59 @@ static int __init slip_init(void) ...@@ -1354,51 +1376,59 @@ static int __init slip_init(void)
static void __exit slip_exit(void) static void __exit slip_exit(void)
{ {
int i; int i;
struct net_device *dev;
if (slip_ctrls != NULL) { struct slip *sl;
unsigned long timeout = jiffies + HZ; unsigned long timeout = jiffies + HZ;
int busy = 0; int busy = 0;
if (slip_devs == NULL)
return;
/* First of all: check for active disciplines and hangup them. /* First of all: check for active disciplines and hangup them.
*/ */
do { do {
if (busy) if (busy) {
yield(); current->state = TASK_INTERRUPTIBLE;
schedule_timeout(HZ / 10);
current->state = TASK_RUNNING;
}
busy = 0; busy = 0;
local_bh_disable();
for (i = 0; i < slip_maxdev; i++) { for (i = 0; i < slip_maxdev; i++) {
struct slip_ctrl *slc = slip_ctrls[i]; dev = slip_devs[i];
if (!slc) if (!dev)
continue; continue;
spin_lock(&slc->ctrl.lock); sl = dev->priv;
if (slc->ctrl.tty) { spin_lock_bh(&sl->lock);
if (sl->tty) {
busy++; busy++;
tty_hangup(slc->ctrl.tty); tty_hangup(sl->tty);
} }
spin_unlock(&slc->ctrl.lock); spin_unlock_bh(&sl->lock);
} }
local_bh_enable();
} while (busy && time_before(jiffies, timeout)); } while (busy && time_before(jiffies, timeout));
for (i = 0; i < slip_maxdev; i++) { for (i = 0; i < slip_maxdev; i++) {
struct slip_ctrl *slc = slip_ctrls[i]; dev = slip_devs[i];
if (slc) { if (!dev)
unregister_netdev(&slc->dev); continue;
if (slc->ctrl.tty) { slip_devs[i] = NULL;
printk(KERN_ERR "%s: tty discipline is still running\n", slc->dev.name);
sl = dev->priv;
if (sl->tty) {
printk(KERN_ERR "%s: tty discipline still running\n",
dev->name);
/* Intentionally leak the control block. */ /* Intentionally leak the control block. */
} else { dev->destructor = NULL;
sl_free_bufs(&slc->ctrl);
kfree(slc);
}
slip_ctrls[i] = NULL;
}
} }
kfree(slip_ctrls); unregister_netdev(dev);
slip_ctrls = NULL;
} }
kfree(slip_devs);
slip_devs = NULL;
if ((i = tty_register_ldisc(N_SLIP, NULL))) if ((i = tty_register_ldisc(N_SLIP, NULL)))
{ {
printk(KERN_ERR "SLIP: can't unregister line discipline (err = %d)\n", i); printk(KERN_ERR "SLIP: can't unregister line discipline (err = %d)\n", i);
......
...@@ -355,8 +355,16 @@ struct net_device ...@@ -355,8 +355,16 @@ struct net_device
spinlock_t queue_lock; spinlock_t queue_lock;
/* Number of references to this device */ /* Number of references to this device */
atomic_t refcnt; atomic_t refcnt;
/* The flag marking that device is unregistered, but held by an user */ /* delayed register/unregister */
int deadbeaf; struct list_head todo_list;
/* register/unregister state machine */
enum { NETREG_UNINITIALIZED=0,
NETREG_REGISTERING, /* called register_netdevice */
NETREG_REGISTERED, /* completed register todo */
NETREG_UNREGISTERING, /* called unregister_netdevice */
NETREG_UNREGISTERED, /* completed unregister todo */
} reg_state;
/* Net device features */ /* Net device features */
int features; int features;
......
...@@ -102,6 +102,8 @@ struct rt_cache_stat ...@@ -102,6 +102,8 @@ struct rt_cache_stat
unsigned int gc_ignored; unsigned int gc_ignored;
unsigned int gc_goal_miss; unsigned int gc_goal_miss;
unsigned int gc_dst_overflow; unsigned int gc_dst_overflow;
unsigned int in_hlist_search;
unsigned int out_hlist_search;
}; };
extern struct rt_cache_stat *rt_cache_stat; extern struct rt_cache_stat *rt_cache_stat;
......
...@@ -160,6 +160,12 @@ static __inline__ int tcp_bhashfn(__u16 lport) ...@@ -160,6 +160,12 @@ static __inline__ int tcp_bhashfn(__u16 lport)
extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
unsigned short snum); unsigned short snum);
#if (BITS_PER_LONG == 64)
#define TCP_ADDRCMP_ALIGN_BYTES 8
#else
#define TCP_ADDRCMP_ALIGN_BYTES 4
#endif
/* This is a TIME_WAIT bucket. It works around the memory consumption /* This is a TIME_WAIT bucket. It works around the memory consumption
* problems of sockets in such a state on heavily loaded servers, but * problems of sockets in such a state on heavily loaded servers, but
* without violating the protocol specification. * without violating the protocol specification.
...@@ -184,7 +190,8 @@ struct tcp_tw_bucket { ...@@ -184,7 +190,8 @@ struct tcp_tw_bucket {
__u16 tw_sport; __u16 tw_sport;
/* Socket demultiplex comparisons on incoming packets. */ /* Socket demultiplex comparisons on incoming packets. */
/* these five are in inet_opt */ /* these five are in inet_opt */
__u32 tw_daddr; __u32 tw_daddr
__attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES)));
__u32 tw_rcv_saddr; __u32 tw_rcv_saddr;
__u16 tw_dport; __u16 tw_dport;
__u16 tw_num; __u16 tw_num;
......
...@@ -782,7 +782,6 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); ...@@ -782,7 +782,6 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler);
extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler);
extern int xfrm4_tunnel_check_size(struct sk_buff *skb); extern int xfrm4_tunnel_check_size(struct sk_buff *skb);
extern int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); extern int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
extern int xfrm6_clear_mutable_options(struct sk_buff *skb, u16 *nh_offset, int dir);
extern int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen); extern int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen);
void xfrm_policy_init(void); void xfrm_policy_init(void);
......
...@@ -874,8 +874,6 @@ static struct tty_operations rfcomm_ops = { ...@@ -874,8 +874,6 @@ static struct tty_operations rfcomm_ops = {
int rfcomm_init_ttys(void) int rfcomm_init_ttys(void)
{ {
int i;
rfcomm_tty_driver = alloc_tty_driver(RFCOMM_TTY_PORTS); rfcomm_tty_driver = alloc_tty_driver(RFCOMM_TTY_PORTS);
if (!rfcomm_tty_driver) if (!rfcomm_tty_driver)
return -1; return -1;
......
...@@ -168,14 +168,6 @@ static void sample_queue(unsigned long dummy); ...@@ -168,14 +168,6 @@ static void sample_queue(unsigned long dummy);
static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0); static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
#endif #endif
#ifdef CONFIG_HOTPLUG
static void net_run_sbin_hotplug(struct net_device *dev, int is_register);
static void net_run_hotplug_todo(void);
#else
#define net_run_sbin_hotplug(dev, is_register) do { } while (0)
#define net_run_hotplug_todo() do { } while (0)
#endif
/* /*
* Our notifier list * Our notifier list
*/ */
...@@ -2537,6 +2529,17 @@ int dev_new_index(void) ...@@ -2537,6 +2529,17 @@ int dev_new_index(void)
static int dev_boot_phase = 1; static int dev_boot_phase = 1;
/* Delayed registration/unregisteration */
static spinlock_t net_todo_list_lock = SPIN_LOCK_UNLOCKED;
static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
static inline void net_set_todo(struct net_device *dev)
{
spin_lock(&net_todo_list_lock);
list_add_tail(&dev->todo_list, &net_todo_list);
spin_unlock(&net_todo_list_lock);
}
/** /**
* register_netdevice - register a network device * register_netdevice - register a network device
* @dev: device to register * @dev: device to register
...@@ -2563,6 +2566,9 @@ int register_netdevice(struct net_device *dev) ...@@ -2563,6 +2566,9 @@ int register_netdevice(struct net_device *dev)
BUG_ON(dev_boot_phase); BUG_ON(dev_boot_phase);
ASSERT_RTNL(); ASSERT_RTNL();
/* When net_device's are persistent, this will be fatal. */
WARN_ON(dev->reg_state != NETREG_UNINITIALIZED);
spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->xmit_lock); spin_lock_init(&dev->xmit_lock);
dev->xmit_lock_owner = -1; dev->xmit_lock_owner = -1;
...@@ -2592,9 +2598,6 @@ int register_netdevice(struct net_device *dev) ...@@ -2592,9 +2598,6 @@ int register_netdevice(struct net_device *dev)
goto out_err; goto out_err;
} }
if ((ret = netdev_register_sysfs(dev)))
goto out_err;
/* Fix illegal SG+CSUM combinations. */ /* Fix illegal SG+CSUM combinations. */
if ((dev->features & NETIF_F_SG) && if ((dev->features & NETIF_F_SG) &&
!(dev->features & (NETIF_F_IP_CSUM | !(dev->features & (NETIF_F_IP_CSUM |
...@@ -2625,13 +2628,14 @@ int register_netdevice(struct net_device *dev) ...@@ -2625,13 +2628,14 @@ int register_netdevice(struct net_device *dev)
write_lock_bh(&dev_base_lock); write_lock_bh(&dev_base_lock);
*dp = dev; *dp = dev;
dev_hold(dev); dev_hold(dev);
dev->deadbeaf = 0; dev->reg_state = NETREG_REGISTERING;
write_unlock_bh(&dev_base_lock); write_unlock_bh(&dev_base_lock);
/* Notify protocols, that a new device appeared. */ /* Notify protocols, that a new device appeared. */
notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
net_run_sbin_hotplug(dev, 1); /* Finish registration after unlock */
net_set_todo(dev);
ret = 0; ret = 0;
out: out:
...@@ -2654,7 +2658,7 @@ static int netdev_finish_unregister(struct net_device *dev) ...@@ -2654,7 +2658,7 @@ static int netdev_finish_unregister(struct net_device *dev)
BUG_TRAP(!dev->ip6_ptr); BUG_TRAP(!dev->ip6_ptr);
BUG_TRAP(!dev->dn_ptr); BUG_TRAP(!dev->dn_ptr);
if (!dev->deadbeaf) { if (dev->reg_state != NETREG_UNREGISTERED) {
printk(KERN_ERR "Freeing alive device %p, %s\n", printk(KERN_ERR "Freeing alive device %p, %s\n",
dev, dev->name); dev, dev->name);
return 0; return 0;
...@@ -2731,41 +2735,60 @@ static void netdev_wait_allrefs(struct net_device *dev) ...@@ -2731,41 +2735,60 @@ static void netdev_wait_allrefs(struct net_device *dev)
* rtnl_unlock(); * rtnl_unlock();
* *
* We are invoked by rtnl_unlock() after it drops the semaphore. * We are invoked by rtnl_unlock() after it drops the semaphore.
* This allows us to deal with two problems: * This allows us to deal with problems:
* 1) We can invoke hotplug without deadlocking with linkwatch via * 1) We can create/delete sysfs objects which invoke hotplug
* keventd. * without deadlocking with linkwatch via keventd.
* 2) Since we run with the RTNL semaphore not held, we can sleep * 2) Since we run with the RTNL semaphore not held, we can sleep
* safely in order to wait for the netdev refcnt to drop to zero. * safely in order to wait for the netdev refcnt to drop to zero.
*/ */
static spinlock_t unregister_todo_lock = SPIN_LOCK_UNLOCKED; static DECLARE_MUTEX(net_todo_run_mutex);
static struct net_device *unregister_todo;
void netdev_run_todo(void) void netdev_run_todo(void)
{ {
struct net_device *dev; struct list_head list = LIST_HEAD_INIT(list);
net_run_hotplug_todo(); /* Safe outside mutex since we only care about entries that
* this cpu put into queue while under RTNL.
*/
if (list_empty(&net_todo_list))
return;
spin_lock(&unregister_todo_lock); /* Need to guard against multiple cpu's getting out of order. */
dev = unregister_todo; down(&net_todo_run_mutex);
unregister_todo = NULL;
spin_unlock(&unregister_todo_lock);
while (dev) { /* Snapshot list, allow later requests */
struct net_device *next = dev->next; spin_lock(&net_todo_list_lock);
list_splice_init(&net_todo_list, &list);
spin_unlock(&net_todo_list_lock);
dev->next = NULL; while (!list_empty(&list)) {
struct net_device *dev
= list_entry(list.next, struct net_device, todo_list);
list_del(&dev->todo_list);
switch(dev->reg_state) {
case NETREG_REGISTERING:
netdev_register_sysfs(dev);
dev->reg_state = NETREG_REGISTERED;
break;
case NETREG_UNREGISTERING:
netdev_unregister_sysfs(dev); netdev_unregister_sysfs(dev);
dev->reg_state = NETREG_UNREGISTERED;
netdev_wait_allrefs(dev); netdev_wait_allrefs(dev);
BUG_ON(atomic_read(&dev->refcnt)); BUG_ON(atomic_read(&dev->refcnt));
netdev_finish_unregister(dev); netdev_finish_unregister(dev);
break;
dev = next; default:
printk(KERN_ERR "network todo '%s' but state %d\n",
dev->name, dev->reg_state);
break;
}
} }
up(&net_todo_run_mutex);
} }
/* Synchronize with packet receive processing. */ /* Synchronize with packet receive processing. */
...@@ -2795,13 +2818,19 @@ int unregister_netdevice(struct net_device *dev) ...@@ -2795,13 +2818,19 @@ int unregister_netdevice(struct net_device *dev)
BUG_ON(dev_boot_phase); BUG_ON(dev_boot_phase);
ASSERT_RTNL(); ASSERT_RTNL();
/* Some devices call without registering for initialization unwind. */
if (dev->reg_state == NETREG_UNINITIALIZED) {
printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
"was registered\n", dev->name, dev);
return -ENODEV;
}
BUG_ON(dev->reg_state != NETREG_REGISTERED);
/* If device is running, close it first. */ /* If device is running, close it first. */
if (dev->flags & IFF_UP) if (dev->flags & IFF_UP)
dev_close(dev); dev_close(dev);
BUG_TRAP(!dev->deadbeaf);
dev->deadbeaf = 1;
/* And unlink it from device chain. */ /* And unlink it from device chain. */
for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
if (d == dev) { if (d == dev) {
...@@ -2812,11 +2841,13 @@ int unregister_netdevice(struct net_device *dev) ...@@ -2812,11 +2841,13 @@ int unregister_netdevice(struct net_device *dev)
} }
} }
if (!d) { if (!d) {
printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " printk(KERN_ERR "unregister net_device: '%s' not found\n",
"was registered\n", dev->name, dev); dev->name);
return -ENODEV; return -ENODEV;
} }
dev->reg_state = NETREG_UNREGISTERING;
synchronize_net(); synchronize_net();
#ifdef CONFIG_NET_FASTROUTE #ifdef CONFIG_NET_FASTROUTE
...@@ -2826,7 +2857,6 @@ int unregister_netdevice(struct net_device *dev) ...@@ -2826,7 +2857,6 @@ int unregister_netdevice(struct net_device *dev)
/* Shutdown queueing discipline. */ /* Shutdown queueing discipline. */
dev_shutdown(dev); dev_shutdown(dev);
net_run_sbin_hotplug(dev, 0);
/* Notify protocols, that we are about to destroy /* Notify protocols, that we are about to destroy
this device. They should clean all the things. this device. They should clean all the things.
...@@ -2846,10 +2876,8 @@ int unregister_netdevice(struct net_device *dev) ...@@ -2846,10 +2876,8 @@ int unregister_netdevice(struct net_device *dev)
free_divert_blk(dev); free_divert_blk(dev);
spin_lock(&unregister_todo_lock); /* Finish processing unregister after unlock */
dev->next = unregister_todo; net_set_todo(dev);
unregister_todo = dev;
spin_unlock(&unregister_todo_lock);
dev_put(dev); dev_put(dev);
return 0; return 0;
...@@ -2955,11 +2983,11 @@ static int __init net_dev_init(void) ...@@ -2955,11 +2983,11 @@ static int __init net_dev_init(void)
* dev_alloc_name can now advance to next suitable * dev_alloc_name can now advance to next suitable
* name that is checked next. * name that is checked next.
*/ */
dev->deadbeaf = 1;
dp = &dev->next; dp = &dev->next;
} else { } else {
dp = &dev->next; dp = &dev->next;
dev->ifindex = dev_new_index(); dev->ifindex = dev_new_index();
dev->reg_state = NETREG_REGISTERED;
if (dev->iflink == -1) if (dev->iflink == -1)
dev->iflink = dev->ifindex; dev->iflink = dev->ifindex;
if (!dev->rebuild_header) if (!dev->rebuild_header)
...@@ -2974,7 +3002,7 @@ static int __init net_dev_init(void) ...@@ -2974,7 +3002,7 @@ static int __init net_dev_init(void)
*/ */
dp = &dev_base; dp = &dev_base;
while ((dev = *dp) != NULL) { while ((dev = *dp) != NULL) {
if (dev->deadbeaf) { if (dev->reg_state != NETREG_REGISTERED) {
write_lock_bh(&dev_base_lock); write_lock_bh(&dev_base_lock);
*dp = dev->next; *dp = dev->next;
write_unlock_bh(&dev_base_lock); write_unlock_bh(&dev_base_lock);
...@@ -3001,96 +3029,3 @@ static int __init net_dev_init(void) ...@@ -3001,96 +3029,3 @@ static int __init net_dev_init(void)
} }
subsys_initcall(net_dev_init); subsys_initcall(net_dev_init);
#ifdef CONFIG_HOTPLUG
struct net_hotplug_todo {
struct list_head list;
char ifname[IFNAMSIZ];
int is_register;
};
static spinlock_t net_hotplug_list_lock = SPIN_LOCK_UNLOCKED;
static DECLARE_MUTEX(net_hotplug_run);
static struct list_head net_hotplug_list = LIST_HEAD_INIT(net_hotplug_list);
static inline void net_run_hotplug_one(struct net_hotplug_todo *ent)
{
char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32];
int i;
sprintf(ifname, "INTERFACE=%s", ent->ifname);
sprintf(action_str, "ACTION=%s",
(ent->is_register ? "register" : "unregister"));
i = 0;
argv[i++] = hotplug_path;
argv[i++] = "net";
argv[i] = 0;
i = 0;
/* minimal command environment */
envp [i++] = "HOME=/";
envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
envp [i++] = ifname;
envp [i++] = action_str;
envp [i] = 0;
call_usermodehelper(argv [0], argv, envp, 0);
}
/* Run all queued hotplug requests.
* Requests are run in FIFO order.
*/
static void net_run_hotplug_todo(void)
{
struct list_head list = LIST_HEAD_INIT(list);
/* This is racy but okay since any other requests will get
* processed when the other guy does rtnl_unlock.
*/
if (list_empty(&net_hotplug_list))
return;
/* Need to guard against multiple cpu's getting out of order. */
down(&net_hotplug_run);
/* Snapshot list, allow later requests */
spin_lock(&net_hotplug_list_lock);
list_splice_init(&net_hotplug_list, &list);
spin_unlock(&net_hotplug_list_lock);
while (!list_empty(&list)) {
struct net_hotplug_todo *ent;
ent = list_entry(list.next, struct net_hotplug_todo, list);
list_del(&ent->list);
net_run_hotplug_one(ent);
kfree(ent);
}
up(&net_hotplug_run);
}
/* Notify userspace when a netdevice event occurs,
* by running '/sbin/hotplug net' with certain
* environment variables set.
*/
static void net_run_sbin_hotplug(struct net_device *dev, int is_register)
{
struct net_hotplug_todo *ent = kmalloc(sizeof(*ent), GFP_KERNEL);
ASSERT_RTNL();
if (!ent)
return;
INIT_LIST_HEAD(&ent->list);
memcpy(ent->ifname, dev->name, IFNAMSIZ);
ent->is_register = is_register;
spin_lock(&net_hotplug_list_lock);
list_add(&ent->list, &net_hotplug_list);
spin_unlock(&net_hotplug_list_lock);
}
#endif
...@@ -15,6 +15,11 @@ ...@@ -15,6 +15,11 @@
#define to_class_dev(obj) container_of(obj,struct class_device,kobj) #define to_class_dev(obj) container_of(obj,struct class_device,kobj)
#define to_net_dev(class) container_of(class, struct net_device, class_dev) #define to_net_dev(class) container_of(class, struct net_device, class_dev)
static inline int dev_isalive(const struct net_device *dev)
{
return dev->reg_state == NETREG_REGISTERED;
}
/* use same locking rules as GIF* ioctl's */ /* use same locking rules as GIF* ioctl's */
static ssize_t netdev_show(const struct class_device *cd, char *buf, static ssize_t netdev_show(const struct class_device *cd, char *buf,
ssize_t (*format)(const struct net_device *, char *)) ssize_t (*format)(const struct net_device *, char *))
...@@ -23,7 +28,7 @@ static ssize_t netdev_show(const struct class_device *cd, char *buf, ...@@ -23,7 +28,7 @@ static ssize_t netdev_show(const struct class_device *cd, char *buf,
ssize_t ret = -EINVAL; ssize_t ret = -EINVAL;
read_lock(&dev_base_lock); read_lock(&dev_base_lock);
if (!net->deadbeaf) if (dev_isalive(net))
ret = (*format)(net, buf); ret = (*format)(net, buf);
read_unlock(&dev_base_lock); read_unlock(&dev_base_lock);
...@@ -60,7 +65,7 @@ static ssize_t netdev_store(struct class_device *dev, ...@@ -60,7 +65,7 @@ static ssize_t netdev_store(struct class_device *dev,
goto err; goto err;
rtnl_lock(); rtnl_lock();
if (!net->deadbeaf) { if (dev_isalive(net)) {
if ((ret = (*set)(net, new)) == 0) if ((ret = (*set)(net, new)) == 0)
ret = len; ret = len;
} }
...@@ -97,17 +102,17 @@ static ssize_t format_addr(char *buf, const unsigned char *addr, int len) ...@@ -97,17 +102,17 @@ static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
static ssize_t show_address(struct class_device *dev, char *buf) static ssize_t show_address(struct class_device *dev, char *buf)
{ {
struct net_device *net = to_net_dev(dev); struct net_device *net = to_net_dev(dev);
if (net->deadbeaf) if (dev_isalive(net))
return -EINVAL;
return format_addr(buf, net->dev_addr, net->addr_len); return format_addr(buf, net->dev_addr, net->addr_len);
return -EINVAL;
} }
static ssize_t show_broadcast(struct class_device *dev, char *buf) static ssize_t show_broadcast(struct class_device *dev, char *buf)
{ {
struct net_device *net = to_net_dev(dev); struct net_device *net = to_net_dev(dev);
if (net->deadbeaf) if (dev_isalive(net))
return -EINVAL;
return format_addr(buf, net->broadcast, net->addr_len); return format_addr(buf, net->broadcast, net->addr_len);
return -EINVAL;
} }
static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL); static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
...@@ -152,16 +157,12 @@ static int change_tx_queue_len(struct net_device *net, unsigned long new_len) ...@@ -152,16 +157,12 @@ static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, size_t len) static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, size_t len)
{ {
return netdev_store(dev, buf,len, change_tx_queue_len); return netdev_store(dev, buf, len, change_tx_queue_len);
} }
static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
store_tx_queue_len); store_tx_queue_len);
static struct class net_class = {
.name = "net",
};
static struct class_device_attribute *net_class_attributes[] = { static struct class_device_attribute *net_class_attributes[] = {
&class_device_attr_ifindex, &class_device_attr_ifindex,
...@@ -263,7 +264,7 @@ netstat_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) ...@@ -263,7 +264,7 @@ netstat_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
ssize_t ret = -EINVAL; ssize_t ret = -EINVAL;
read_lock(&dev_base_lock); read_lock(&dev_base_lock);
if (!dev->deadbeaf && entry->show && dev->get_stats && if (dev_isalive(dev) && entry->show && dev->get_stats &&
(stats = (*dev->get_stats)(dev))) (stats = (*dev->get_stats)(dev)))
ret = entry->show(stats, buf); ret = entry->show(stats, buf);
read_unlock(&dev_base_lock); read_unlock(&dev_base_lock);
...@@ -279,6 +280,35 @@ static struct kobj_type netstat_ktype = { ...@@ -279,6 +280,35 @@ static struct kobj_type netstat_ktype = {
.default_attrs = default_attrs, .default_attrs = default_attrs,
}; };
#ifdef CONFIG_HOTPLUG
static int netdev_hotplug(struct class_device *cd, char **envp,
int num_envp, char *buf, int size)
{
struct net_device *dev = to_net_dev(cd);
int i = 0;
int n;
/* pass interface in env to hotplug. */
envp[i++] = buf;
n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
buf += n;
size -= n;
if ((size <= 0) || (i >= num_envp))
return -ENOMEM;
envp[i] = 0;
return 0;
}
#endif
static struct class net_class = {
.name = "net",
#ifdef CONFIG_HOTPLUG
.hotplug = netdev_hotplug,
#endif
};
/* Create sysfs entries for network device. */ /* Create sysfs entries for network device. */
int netdev_register_sysfs(struct net_device *net) int netdev_register_sysfs(struct net_device *net)
{ {
......
...@@ -812,7 +812,7 @@ static int dn_nsp_rx_packet(struct sk_buff *skb) ...@@ -812,7 +812,7 @@ static int dn_nsp_rx_packet(struct sk_buff *skb)
printk(KERN_DEBUG "NSP: 0x%02x 0x%02x 0x%04x 0x%04x %d\n", printk(KERN_DEBUG "NSP: 0x%02x 0x%02x 0x%04x 0x%04x %d\n",
(int)cb->rt_flags, (int)cb->nsp_flags, (int)cb->rt_flags, (int)cb->nsp_flags,
(int)cb->src_port, (int)cb->dst_port, (int)cb->src_port, (int)cb->dst_port,
(int)sock_owned_by_user(sk)); !!sock_owned_by_user(sk));
if (!sock_owned_by_user(sk)) if (!sock_owned_by_user(sk))
ret = dn_nsp_backlog_rcv(sk, skb); ret = dn_nsp_backlog_rcv(sk, skb);
else else
......
...@@ -1134,7 +1134,7 @@ static int __init inet_init(void) ...@@ -1134,7 +1134,7 @@ static int __init inet_init(void)
printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n"); printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
#ifdef CONFIG_IP_MULTICAST #ifdef CONFIG_IP_MULTICAST
if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n"); printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
#endif #endif
/* Register the socket-side information for inet_create. */ /* Register the socket-side information for inet_create. */
......
...@@ -321,7 +321,7 @@ static int rt_cache_stat_get_info(char *buffer, char **start, off_t offset, int ...@@ -321,7 +321,7 @@ static int rt_cache_stat_get_info(char *buffer, char **start, off_t offset, int
for (i = 0; i < NR_CPUS; i++) { for (i = 0; i < NR_CPUS; i++) {
if (!cpu_possible(i)) if (!cpu_possible(i))
continue; continue;
len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
dst_entries, dst_entries,
per_cpu_ptr(rt_cache_stat, i)->in_hit, per_cpu_ptr(rt_cache_stat, i)->in_hit,
per_cpu_ptr(rt_cache_stat, i)->in_slow_tot, per_cpu_ptr(rt_cache_stat, i)->in_slow_tot,
...@@ -338,7 +338,9 @@ static int rt_cache_stat_get_info(char *buffer, char **start, off_t offset, int ...@@ -338,7 +338,9 @@ static int rt_cache_stat_get_info(char *buffer, char **start, off_t offset, int
per_cpu_ptr(rt_cache_stat, i)->gc_total, per_cpu_ptr(rt_cache_stat, i)->gc_total,
per_cpu_ptr(rt_cache_stat, i)->gc_ignored, per_cpu_ptr(rt_cache_stat, i)->gc_ignored,
per_cpu_ptr(rt_cache_stat, i)->gc_goal_miss, per_cpu_ptr(rt_cache_stat, i)->gc_goal_miss,
per_cpu_ptr(rt_cache_stat, i)->gc_dst_overflow per_cpu_ptr(rt_cache_stat, i)->gc_dst_overflow,
per_cpu_ptr(rt_cache_stat, i)->in_hlist_search,
per_cpu_ptr(rt_cache_stat, i)->out_hlist_search
); );
} }
...@@ -1786,6 +1788,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, ...@@ -1786,6 +1788,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
skb->dst = (struct dst_entry*)rth; skb->dst = (struct dst_entry*)rth;
return 0; return 0;
} }
RT_CACHE_STAT_INC(in_hlist_search);
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -2153,6 +2156,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) ...@@ -2153,6 +2156,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
*rp = rth; *rp = rth;
return 0; return 0;
} }
RT_CACHE_STAT_INC(out_hlist_search);
} }
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -1363,7 +1363,7 @@ static inline void syn_flood_warning(struct sk_buff *skb) ...@@ -1363,7 +1363,7 @@ static inline void syn_flood_warning(struct sk_buff *skb)
{ {
static unsigned long warntime; static unsigned long warntime;
if (jiffies - warntime > HZ * 60) { if (time_after(jiffies, (warntime + HZ * 60))) {
warntime = jiffies; warntime = jiffies;
printk(KERN_INFO printk(KERN_INFO
"possible SYN flooding on port %d. Sending cookies.\n", "possible SYN flooding on port %d. Sending cookies.\n",
......
...@@ -563,7 +563,7 @@ void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) ...@@ -563,7 +563,7 @@ void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo)
tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK); tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK);
add_timer(&tcp_twcal_timer); add_timer(&tcp_twcal_timer);
} else { } else {
if ((long)(tcp_twcal_timer.expires - jiffies) > (slot<<TCP_TW_RECYCLE_TICK)) if (time_after(tcp_twcal_timer.expires, jiffies + (slot<<TCP_TW_RECYCLE_TICK)))
mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK)); mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK));
slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1); slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1);
} }
...@@ -596,7 +596,7 @@ void tcp_twcal_tick(unsigned long dummy) ...@@ -596,7 +596,7 @@ void tcp_twcal_tick(unsigned long dummy)
j = tcp_twcal_jiffie; j = tcp_twcal_jiffie;
for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) { for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) {
if ((long)(j - now) <= 0) { if (time_before_eq(j, now)) {
struct tcp_tw_bucket *tw; struct tcp_tw_bucket *tw;
while((tw = tcp_twcal_row[slot]) != NULL) { while((tw = tcp_twcal_row[slot]) != NULL) {
......
...@@ -227,7 +227,7 @@ static void tcp_delack_timer(unsigned long data) ...@@ -227,7 +227,7 @@ static void tcp_delack_timer(unsigned long data)
if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER)) if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
goto out; goto out;
if ((long)(tp->ack.timeout - jiffies) > 0) { if (time_after(tp->ack.timeout, jiffies)) {
if (!mod_timer(&tp->delack_timer, tp->ack.timeout)) if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
sock_hold(sk); sock_hold(sk);
goto out; goto out;
...@@ -436,7 +436,7 @@ static void tcp_write_timer(unsigned long data) ...@@ -436,7 +436,7 @@ static void tcp_write_timer(unsigned long data)
if (sk->sk_state == TCP_CLOSE || !tp->pending) if (sk->sk_state == TCP_CLOSE || !tp->pending)
goto out; goto out;
if ((long)(tp->timeout - jiffies) > 0) { if (time_after(tp->timeout, jiffies)) {
if (!mod_timer(&tp->retransmit_timer, tp->timeout)) if (!mod_timer(&tp->retransmit_timer, tp->timeout))
sock_hold(sk); sock_hold(sk);
goto out; goto out;
...@@ -516,7 +516,7 @@ static void tcp_synack_timer(struct sock *sk) ...@@ -516,7 +516,7 @@ static void tcp_synack_timer(struct sock *sk)
do { do {
reqp=&lopt->syn_table[i]; reqp=&lopt->syn_table[i];
while ((req = *reqp) != NULL) { while ((req = *reqp) != NULL) {
if ((long)(now - req->expires) >= 0) { if (time_after_eq(now, req->expires)) {
if ((req->retrans < thresh || if ((req->retrans < thresh ||
(req->acked && req->retrans < max_retries)) (req->acked && req->retrans < max_retries))
&& !req->class->rtx_syn_ack(sk, req, NULL)) { && !req->class->rtx_syn_ack(sk, req, NULL)) {
......
...@@ -36,6 +36,114 @@ ...@@ -36,6 +36,114 @@
#include <net/xfrm.h> #include <net/xfrm.h>
#include <asm/scatterlist.h> #include <asm/scatterlist.h>
static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
{
u8 *opt = (u8 *)opthdr;
int len = ipv6_optlen(opthdr);
int off = 0;
int optlen = 0;
off += 2;
len -= 2;
while (len > 0) {
switch (opt[off]) {
case IPV6_TLV_PAD0:
optlen = 1;
break;
default:
if (len < 2)
goto bad;
optlen = opt[off+1]+2;
if (len < optlen)
goto bad;
if (opt[off] & 0x20)
memset(&opt[off+2], 0, opt[off+1]);
break;
}
off += optlen;
len -= optlen;
}
if (len == 0)
return 1;
bad:
return 0;
}
static int ipv6_clear_mutable_options(struct sk_buff *skb, u16 *nh_offset, int dir)
{
u16 offset = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
unsigned int packet_len = skb->tail - skb->nh.raw;
u8 nexthdr = skb->nh.ipv6h->nexthdr;
u8 nextnexthdr = 0;
*nh_offset = ((unsigned char *)&skb->nh.ipv6h->nexthdr) - skb->nh.raw;
while (offset + 1 <= packet_len) {
switch (nexthdr) {
case NEXTHDR_HOP:
*nh_offset = offset;
offset += ipv6_optlen(exthdr);
if (!zero_out_mutable_opts(exthdr)) {
if (net_ratelimit())
printk(KERN_WARNING "overrun hopopts\n");
return 0;
}
nexthdr = exthdr->nexthdr;
exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
break;
case NEXTHDR_ROUTING:
*nh_offset = offset;
offset += ipv6_optlen(exthdr);
((struct ipv6_rt_hdr*)exthdr)->segments_left = 0;
nexthdr = exthdr->nexthdr;
exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
break;
case NEXTHDR_DEST:
*nh_offset = offset;
offset += ipv6_optlen(exthdr);
if (!zero_out_mutable_opts(exthdr)) {
if (net_ratelimit())
printk(KERN_WARNING "overrun destopt\n");
return 0;
}
nexthdr = exthdr->nexthdr;
exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
break;
case NEXTHDR_AUTH:
if (dir == XFRM_POLICY_OUT) {
memset(((struct ipv6_auth_hdr*)exthdr)->auth_data, 0,
(((struct ipv6_auth_hdr*)exthdr)->hdrlen - 1) << 2);
}
if (exthdr->nexthdr == NEXTHDR_DEST) {
offset += (((struct ipv6_auth_hdr*)exthdr)->hdrlen + 2) << 2;
exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
nextnexthdr = exthdr->nexthdr;
if (!zero_out_mutable_opts(exthdr)) {
if (net_ratelimit())
printk(KERN_WARNING "overrun destopt\n");
return 0;
}
}
return nexthdr;
default :
return nexthdr;
}
}
return nexthdr;
}
int ah6_output(struct sk_buff *skb) int ah6_output(struct sk_buff *skb)
{ {
int err; int err;
...@@ -80,7 +188,7 @@ int ah6_output(struct sk_buff *skb) ...@@ -80,7 +188,7 @@ int ah6_output(struct sk_buff *skb)
memcpy(iph, skb->data, hdr_len); memcpy(iph, skb->data, hdr_len);
skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, x->props.header_len); skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, x->props.header_len);
memcpy(skb->nh.ipv6h, iph, hdr_len); memcpy(skb->nh.ipv6h, iph, hdr_len);
nexthdr = xfrm6_clear_mutable_options(skb, &nh_offset, XFRM_POLICY_OUT); nexthdr = ipv6_clear_mutable_options(skb, &nh_offset, XFRM_POLICY_OUT);
if (nexthdr == 0) if (nexthdr == 0)
goto error; goto error;
...@@ -138,20 +246,46 @@ int ah6_output(struct sk_buff *skb) ...@@ -138,20 +246,46 @@ int ah6_output(struct sk_buff *skb)
int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
{ {
int ah_hlen; /*
struct ipv6hdr *iph; * Before process AH
* [IPv6][Ext1][Ext2][AH][Dest][Payload]
* |<-------------->| hdr_len
* |<------------------------>| cleared_hlen
*
* To erase AH:
* Keeping copy of cleared headers. After AH processing,
* Moving the pointer of skb->nh.raw by using skb_pull as long as AH
* header length. Then copy back the copy as long as hdr_len
* If destination header following AH exists, copy it into after [Ext2].
*
* |<>|[IPv6][Ext1][Ext2][Dest][Payload]
* There is offset of AH before IPv6 header after the process.
*/
struct ipv6hdr *iph = skb->nh.ipv6h;
struct ipv6_auth_hdr *ah; struct ipv6_auth_hdr *ah;
struct ah_data *ahp; struct ah_data *ahp;
unsigned char *tmp_hdr = NULL; unsigned char *tmp_hdr = NULL;
int hdr_len = skb->h.raw - skb->nh.raw; u16 hdr_len = skb->data - skb->nh.raw;
u16 ah_hlen;
u16 cleared_hlen = hdr_len;
u16 nh_offset = 0;
u8 nexthdr = 0; u8 nexthdr = 0;
u8 *prevhdr;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out; goto out;
ah = (struct ipv6_auth_hdr*)skb->data; ah = (struct ipv6_auth_hdr*)skb->data;
ahp = x->data; ahp = x->data;
nexthdr = ah->nexthdr;
ah_hlen = (ah->hdrlen + 2) << 2; ah_hlen = (ah->hdrlen + 2) << 2;
cleared_hlen += ah_hlen;
if (nexthdr == NEXTHDR_DEST) {
struct ipv6_opt_hdr *dsthdr = (struct ipv6_opt_hdr*)(skb->data + ah_hlen);
cleared_hlen += ipv6_optlen(dsthdr);
}
if (ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_full_len) && if (ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_full_len) &&
ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len)) ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len))
...@@ -166,12 +300,16 @@ int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_bu ...@@ -166,12 +300,16 @@ int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_bu
pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
goto out; goto out;
tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC); tmp_hdr = kmalloc(cleared_hlen, GFP_ATOMIC);
if (!tmp_hdr) if (!tmp_hdr)
goto out; goto out;
memcpy(tmp_hdr, skb->nh.raw, hdr_len); memcpy(tmp_hdr, skb->nh.raw, cleared_hlen);
ah = (struct ipv6_auth_hdr*)skb->data; ipv6_clear_mutable_options(skb, &nh_offset, XFRM_POLICY_IN);
iph = skb->nh.ipv6h; iph->priority = 0;
iph->flow_lbl[0] = 0;
iph->flow_lbl[1] = 0;
iph->flow_lbl[2] = 0;
iph->hop_limit = 0;
{ {
u8 auth_data[ahp->icv_trunc_len]; u8 auth_data[ahp->icv_trunc_len];
...@@ -187,9 +325,15 @@ int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_bu ...@@ -187,9 +325,15 @@ int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_bu
} }
} }
nexthdr = ((struct ipv6hdr*)tmp_hdr)->nexthdr = ah->nexthdr; skb->nh.raw = skb_pull(skb, ah_hlen);
skb->nh.raw = skb_pull(skb, (ah->hdrlen+2)<<2);
memcpy(skb->nh.raw, tmp_hdr, hdr_len); memcpy(skb->nh.raw, tmp_hdr, hdr_len);
if (nexthdr == NEXTHDR_DEST) {
memcpy(skb->nh.raw + hdr_len,
tmp_hdr + hdr_len + ah_hlen,
cleared_hlen - hdr_len - ah_hlen);
}
prevhdr = (u8*)(skb->nh.raw + nh_offset);
*prevhdr = nexthdr;
skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_pull(skb, hdr_len); skb_pull(skb, hdr_len);
skb->h.raw = skb->data; skb->h.raw = skb->data;
......
...@@ -37,7 +37,6 @@ EXPORT_SYMBOL(in6addr_loopback); ...@@ -37,7 +37,6 @@ EXPORT_SYMBOL(in6addr_loopback);
EXPORT_SYMBOL(in6_dev_finish_destroy); EXPORT_SYMBOL(in6_dev_finish_destroy);
EXPORT_SYMBOL(ip6_find_1stfragopt); EXPORT_SYMBOL(ip6_find_1stfragopt);
EXPORT_SYMBOL(xfrm6_rcv); EXPORT_SYMBOL(xfrm6_rcv);
EXPORT_SYMBOL(xfrm6_clear_mutable_options);
EXPORT_SYMBOL(rt6_lookup); EXPORT_SYMBOL(rt6_lookup);
EXPORT_SYMBOL(fl6_sock_lookup); EXPORT_SYMBOL(fl6_sock_lookup);
EXPORT_SYMBOL(ipv6_ext_hdr); EXPORT_SYMBOL(ipv6_ext_hdr);
......
...@@ -15,114 +15,6 @@ ...@@ -15,114 +15,6 @@
static kmem_cache_t *secpath_cachep; static kmem_cache_t *secpath_cachep;
static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
{
u8 *opt = (u8 *)opthdr;
int len = ipv6_optlen(opthdr);
int off = 0;
int optlen = 0;
off += 2;
len -= 2;
while (len > 0) {
switch (opt[off]) {
case IPV6_TLV_PAD0:
optlen = 1;
break;
default:
if (len < 2)
goto bad;
optlen = opt[off+1]+2;
if (len < optlen)
goto bad;
if (opt[off] & 0x20)
memset(&opt[off+2], 0, opt[off+1]);
break;
}
off += optlen;
len -= optlen;
}
if (len == 0)
return 1;
bad:
return 0;
}
int xfrm6_clear_mutable_options(struct sk_buff *skb, u16 *nh_offset, int dir)
{
u16 offset = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
unsigned int packet_len = skb->tail - skb->nh.raw;
u8 nexthdr = skb->nh.ipv6h->nexthdr;
u8 nextnexthdr = 0;
*nh_offset = ((unsigned char *)&skb->nh.ipv6h->nexthdr) - skb->nh.raw;
while (offset + 1 <= packet_len) {
switch (nexthdr) {
case NEXTHDR_HOP:
*nh_offset = offset;
offset += ipv6_optlen(exthdr);
if (!zero_out_mutable_opts(exthdr)) {
if (net_ratelimit())
printk(KERN_WARNING "overrun hopopts\n");
return 0;
}
nexthdr = exthdr->nexthdr;
exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
break;
case NEXTHDR_ROUTING:
*nh_offset = offset;
offset += ipv6_optlen(exthdr);
((struct ipv6_rt_hdr*)exthdr)->segments_left = 0;
nexthdr = exthdr->nexthdr;
exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
break;
case NEXTHDR_DEST:
*nh_offset = offset;
offset += ipv6_optlen(exthdr);
if (!zero_out_mutable_opts(exthdr)) {
if (net_ratelimit())
printk(KERN_WARNING "overrun destopt\n");
return 0;
}
nexthdr = exthdr->nexthdr;
exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
break;
case NEXTHDR_AUTH:
if (dir == XFRM_POLICY_OUT) {
memset(((struct ipv6_auth_hdr*)exthdr)->auth_data, 0,
(((struct ipv6_auth_hdr*)exthdr)->hdrlen - 1) << 2);
}
if (exthdr->nexthdr == NEXTHDR_DEST) {
offset += (((struct ipv6_auth_hdr*)exthdr)->hdrlen + 2) << 2;
exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
nextnexthdr = exthdr->nexthdr;
if (!zero_out_mutable_opts(exthdr)) {
if (net_ratelimit())
printk(KERN_WARNING "overrun destopt\n");
return 0;
}
}
return nexthdr;
default :
return nexthdr;
}
}
return nexthdr;
}
int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
{ {
struct sk_buff *skb = *pskb; struct sk_buff *skb = *pskb;
...@@ -132,26 +24,12 @@ int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) ...@@ -132,26 +24,12 @@ int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
struct xfrm_state *x; struct xfrm_state *x;
int xfrm_nr = 0; int xfrm_nr = 0;
int decaps = 0; int decaps = 0;
struct ipv6hdr *hdr = skb->nh.ipv6h;
unsigned char *tmp_hdr = NULL;
int hdr_len = 0;
u16 nh_offset = 0;
int nexthdr = 0; int nexthdr = 0;
u8 *prevhdr = NULL;
nh_offset = ((unsigned char*)&skb->nh.ipv6h->nexthdr) - skb->nh.raw; ip6_find_1stfragopt(skb, &prevhdr);
hdr_len = sizeof(struct ipv6hdr); nexthdr = *prevhdr;
*nhoffp = prevhdr - skb->nh.raw;
tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
if (!tmp_hdr)
goto drop;
memcpy(tmp_hdr, skb->nh.raw, hdr_len);
nexthdr = xfrm6_clear_mutable_options(skb, &nh_offset, XFRM_POLICY_IN);
hdr->priority = 0;
hdr->flow_lbl[0] = 0;
hdr->flow_lbl[1] = 0;
hdr->flow_lbl[2] = 0;
hdr->hop_limit = 0;
if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
goto drop; goto drop;
...@@ -204,12 +82,6 @@ int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) ...@@ -204,12 +82,6 @@ int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
goto drop; goto drop;
} while (!err); } while (!err);
if (!decaps) {
memcpy(skb->nh.raw, tmp_hdr, hdr_len);
skb->nh.raw[nh_offset] = nexthdr;
skb->nh.ipv6h->payload_len = htons(hdr_len + skb->len - sizeof(struct ipv6hdr));
}
/* Allocate new secpath or COW existing one. */ /* Allocate new secpath or COW existing one. */
if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
kmem_cache_t *pool = skb->sp ? skb->sp->pool : secpath_cachep; kmem_cache_t *pool = skb->sp ? skb->sp->pool : secpath_cachep;
...@@ -243,7 +115,6 @@ int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) ...@@ -243,7 +115,6 @@ int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
netif_rx(skb); netif_rx(skb);
return -1; return -1;
} else { } else {
*nhoffp = nh_offset;
return 1; return 1;
} }
...@@ -251,7 +122,6 @@ int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) ...@@ -251,7 +122,6 @@ int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
spin_unlock(&x->lock); spin_unlock(&x->lock);
xfrm_state_put(x); xfrm_state_put(x);
drop: drop:
if (tmp_hdr) kfree(tmp_hdr);
while (--xfrm_nr >= 0) while (--xfrm_nr >= 0)
xfrm_state_put(xfrm_vec[xfrm_nr].xvec); xfrm_state_put(xfrm_vec[xfrm_nr].xvec);
kfree_skb(skb); kfree_skb(skb);
......
...@@ -513,7 +513,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) ...@@ -513,7 +513,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
maxspi = ntohl(maxspi); maxspi = ntohl(maxspi);
for (h=0; h<maxspi-minspi+1; h++) { for (h=0; h<maxspi-minspi+1; h++) {
spi = minspi + net_random()%(maxspi-minspi+1); spi = minspi + net_random()%(maxspi-minspi+1);
x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
if (x0 == NULL) if (x0 == NULL)
break; break;
xfrm_state_put(x0); xfrm_state_put(x0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment