Commit f5bda5bc authored by David S. Miller's avatar David S. Miller

Merge nuts.ninka.net:/home/davem/src/BK/network-2.5

into nuts.ninka.net:/home/davem/src/BK/net-2.5
parents d62a3dd4 cb52a86c
...@@ -6,14 +6,16 @@ menu "Cryptographic options" ...@@ -6,14 +6,16 @@ menu "Cryptographic options"
config CRYPTO config CRYPTO
bool "Cryptographic API" bool "Cryptographic API"
default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m || INET6_AH=y || INET6_AH=m || \
INET6_ESP=y || INET6_ESP=m
help help
This option provides the core Cryptographic API. This option provides the core Cryptographic API.
config CRYPTO_HMAC config CRYPTO_HMAC
bool "HMAC support" bool "HMAC support"
depends on CRYPTO depends on CRYPTO
default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m || INET6_AH=y || INET6_AH=m || \
INET6_ESP=y || INET6_ESP=m
help help
HMAC: Keyed-Hashing for Message Authentication (RFC2104). HMAC: Keyed-Hashing for Message Authentication (RFC2104).
This is required for IPSec. This is required for IPSec.
...@@ -33,14 +35,16 @@ config CRYPTO_MD4 ...@@ -33,14 +35,16 @@ config CRYPTO_MD4
config CRYPTO_MD5 config CRYPTO_MD5
tristate "MD5 digest algorithm" tristate "MD5 digest algorithm"
depends on CRYPTO depends on CRYPTO
default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m || INET6_AH=y || INET6_AH=m || \
INET6_ESP=y || INET6_ESP=m
help help
MD5 message digest algorithm (RFC1321). MD5 message digest algorithm (RFC1321).
config CRYPTO_SHA1 config CRYPTO_SHA1
tristate "SHA1 digest algorithm" tristate "SHA1 digest algorithm"
depends on CRYPTO depends on CRYPTO
default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m || INET6_AH=y || INET6_AH=m || \
INET6_ESP=y || INET6_ESP=m
help help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
...@@ -68,7 +72,7 @@ config CRYPTO_SHA512 ...@@ -68,7 +72,7 @@ config CRYPTO_SHA512
config CRYPTO_DES config CRYPTO_DES
tristate "DES and Triple DES EDE cipher algorithms" tristate "DES and Triple DES EDE cipher algorithms"
depends on CRYPTO depends on CRYPTO
default y if INET_AH=y || INET_AH=m || INET_ESP=y || INET_ESP=m default y if INET_ESP=y || INET_ESP=m || INET6_ESP=y || INET6_ESP=m
help help
DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3). DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
......
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/brlock.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/cache.h> #include <linux/cache.h>
......
...@@ -8,7 +8,6 @@ ...@@ -8,7 +8,6 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/brlock.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/cache.h> #include <linux/cache.h>
......
#ifndef __LINUX_BRLOCK_H
#define __LINUX_BRLOCK_H
/*
* 'Big Reader' read-write spinlocks.
*
* super-fast read/write locks, with write-side penalty. The point
* is to have a per-CPU read/write lock. Readers lock their CPU-local
* readlock, writers must lock all locks to get write access. These
* CPU-read-write locks are semantically identical to normal rwlocks.
* Memory usage is higher as well. (NR_CPUS*L1_CACHE_BYTES bytes)
*
* The most important feature is that these spinlocks do not cause
* cacheline ping-pong in the 'most readonly data' case.
*
* Copyright 2000, Ingo Molnar <mingo@redhat.com>
*
* Registry idea and naming [ crutial! :-) ] by:
*
* David S. Miller <davem@redhat.com>
*
* David has an implementation that doesn't use atomic operations in
* the read branch via memory ordering tricks - i guess we need to
* split this up into a per-arch thing? The atomicity issue is a
* secondary item in profiles, at least on x86 platforms.
*
* The atomic op version overhead is indeed a big deal on
* load-locked/store-conditional cpus (ALPHA/MIPS/PPC) and
* compare-and-swap cpus (Sparc64). So we control which
* implementation to use with a __BRLOCK_USE_ATOMICS define. -DaveM
*
*/
/* Register bigreader lock indices here. */
enum brlock_indices {
BR_NETPROTO_LOCK,
__BR_END
};
#include <linux/config.h>
#ifdef CONFIG_SMP
#include <linux/cache.h>
#include <linux/spinlock.h>
#if defined(__i386__) || defined(__ia64__) || defined(__x86_64__)
#define __BRLOCK_USE_ATOMICS
#else
#undef __BRLOCK_USE_ATOMICS
#endif
#ifdef __BRLOCK_USE_ATOMICS
typedef rwlock_t brlock_read_lock_t;
#else
typedef unsigned int brlock_read_lock_t;
#endif
/*
* align last allocated index to the next cacheline:
*/
#define __BR_IDX_MAX \
(((sizeof(brlock_read_lock_t)*__BR_END + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) / sizeof(brlock_read_lock_t))
extern brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX];
#ifndef __BRLOCK_USE_ATOMICS
struct br_wrlock {
spinlock_t lock;
} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
extern struct br_wrlock __br_write_locks[__BR_IDX_MAX];
#endif
extern void __br_lock_usage_bug (void);
#ifdef __BRLOCK_USE_ATOMICS
static inline void br_read_lock (enum brlock_indices idx)
{
/*
* This causes a link-time bug message if an
* invalid index is used:
*/
if (idx >= __BR_END)
__br_lock_usage_bug();
preempt_disable();
_raw_read_lock(&__brlock_array[smp_processor_id()][idx]);
}
static inline void br_read_unlock (enum brlock_indices idx)
{
if (idx >= __BR_END)
__br_lock_usage_bug();
read_unlock(&__brlock_array[smp_processor_id()][idx]);
}
#else /* ! __BRLOCK_USE_ATOMICS */
static inline void br_read_lock (enum brlock_indices idx)
{
unsigned int *ctr;
spinlock_t *lock;
/*
* This causes a link-time bug message if an
* invalid index is used:
*/
if (idx >= __BR_END)
__br_lock_usage_bug();
preempt_disable();
ctr = &__brlock_array[smp_processor_id()][idx];
lock = &__br_write_locks[idx].lock;
again:
(*ctr)++;
mb();
if (spin_is_locked(lock)) {
(*ctr)--;
wmb(); /*
* The release of the ctr must become visible
* to the other cpus eventually thus wmb(),
* we don't care if spin_is_locked is reordered
* before the releasing of the ctr.
* However IMHO this wmb() is superflous even in theory.
* It would not be superflous only if on the
* other CPUs doing a ldl_l instead of an ldl
* would make a difference and I don't think this is
* the case.
* I'd like to clarify this issue further
* but for now this is a slow path so adding the
* wmb() will keep us on the safe side.
*/
while (spin_is_locked(lock))
barrier();
goto again;
}
}
static inline void br_read_unlock (enum brlock_indices idx)
{
unsigned int *ctr;
if (idx >= __BR_END)
__br_lock_usage_bug();
ctr = &__brlock_array[smp_processor_id()][idx];
wmb();
(*ctr)--;
preempt_enable();
}
#endif /* __BRLOCK_USE_ATOMICS */
/* write path not inlined - it's rare and larger */
extern void FASTCALL(__br_write_lock (enum brlock_indices idx));
extern void FASTCALL(__br_write_unlock (enum brlock_indices idx));
static inline void br_write_lock (enum brlock_indices idx)
{
if (idx >= __BR_END)
__br_lock_usage_bug();
__br_write_lock(idx);
}
static inline void br_write_unlock (enum brlock_indices idx)
{
if (idx >= __BR_END)
__br_lock_usage_bug();
__br_write_unlock(idx);
}
#else
# define br_read_lock(idx) ({ (void)(idx); preempt_disable(); })
# define br_read_unlock(idx) ({ (void)(idx); preempt_enable(); })
# define br_write_lock(idx) ({ (void)(idx); preempt_disable(); })
# define br_write_unlock(idx) ({ (void)(idx); preempt_enable(); })
#endif /* CONFIG_SMP */
/*
* Now enumerate all of the possible sw/hw IRQ protected
* versions of the interfaces.
*/
#define br_read_lock_irqsave(idx, flags) \
do { local_irq_save(flags); br_read_lock(idx); } while (0)
#define br_read_lock_irq(idx) \
do { local_irq_disable(); br_read_lock(idx); } while (0)
#define br_read_lock_bh(idx) \
do { local_bh_disable(); br_read_lock(idx); } while (0)
#define br_write_lock_irqsave(idx, flags) \
do { local_irq_save(flags); br_write_lock(idx); } while (0)
#define br_write_lock_irq(idx) \
do { local_irq_disable(); br_write_lock(idx); } while (0)
#define br_write_lock_bh(idx) \
do { local_bh_disable(); br_write_lock(idx); } while (0)
#define br_read_unlock_irqrestore(idx, flags) \
do { br_read_unlock(irx); local_irq_restore(flags); } while (0)
#define br_read_unlock_irq(idx) \
do { br_read_unlock(idx); local_irq_enable(); } while (0)
#define br_read_unlock_bh(idx) \
do { br_read_unlock(idx); local_bh_enable(); } while (0)
#define br_write_unlock_irqrestore(idx, flags) \
do { br_write_unlock(irx); local_irq_restore(flags); } while (0)
#define br_write_unlock_irq(idx) \
do { br_write_unlock(idx); local_irq_enable(); } while (0)
#define br_write_unlock_bh(idx) \
do { br_write_unlock(idx); local_bh_enable(); } while (0)
#endif /* __LINUX_BRLOCK_H */
...@@ -121,6 +121,7 @@ struct ipv6hdr { ...@@ -121,6 +121,7 @@ struct ipv6hdr {
#include <linux/icmpv6.h> #include <linux/icmpv6.h>
#include <net/if_inet6.h> /* struct ipv6_mc_socklist */ #include <net/if_inet6.h> /* struct ipv6_mc_socklist */
#include <linux/tcp.h> #include <linux/tcp.h>
#include <linux/udp.h>
/* /*
This structure contains results of exthdrs parsing This structure contains results of exthdrs parsing
...@@ -178,6 +179,11 @@ struct ipv6_pinfo { ...@@ -178,6 +179,11 @@ struct ipv6_pinfo {
struct ipv6_txoptions *opt; struct ipv6_txoptions *opt;
struct sk_buff *pktoptions; struct sk_buff *pktoptions;
struct {
struct ipv6_txoptions *opt;
struct rt6_info *rt;
struct flowi *fl;
} cork;
}; };
struct raw6_opt { struct raw6_opt {
...@@ -200,6 +206,7 @@ struct udp6_sock { ...@@ -200,6 +206,7 @@ struct udp6_sock {
struct sock sk; struct sock sk;
struct ipv6_pinfo *pinet6; struct ipv6_pinfo *pinet6;
struct inet_opt inet; struct inet_opt inet;
struct udp_opt udp;
struct ipv6_pinfo inet6; struct ipv6_pinfo inet6;
}; };
......
...@@ -456,7 +456,7 @@ struct packet_type ...@@ -456,7 +456,7 @@ struct packet_type
int (*func) (struct sk_buff *, struct net_device *, int (*func) (struct sk_buff *, struct net_device *,
struct packet_type *); struct packet_type *);
void *data; /* Private to the packet type */ void *data; /* Private to the packet type */
struct packet_type *next; struct list_head list;
}; };
...@@ -472,6 +472,7 @@ extern int netdev_boot_setup_check(struct net_device *dev); ...@@ -472,6 +472,7 @@ extern int netdev_boot_setup_check(struct net_device *dev);
extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr); extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr);
extern void dev_add_pack(struct packet_type *pt); extern void dev_add_pack(struct packet_type *pt);
extern void dev_remove_pack(struct packet_type *pt); extern void dev_remove_pack(struct packet_type *pt);
extern void __dev_remove_pack(struct packet_type *pt);
extern int dev_get(const char *name); extern int dev_get(const char *name);
extern struct net_device *dev_get_by_flags(unsigned short flags, extern struct net_device *dev_get_by_flags(unsigned short flags,
unsigned short mask); unsigned short mask);
......
...@@ -792,6 +792,15 @@ static inline int skb_pagelen(const struct sk_buff *skb) ...@@ -792,6 +792,15 @@ static inline int skb_pagelen(const struct sk_buff *skb)
return len + skb_headlen(skb); return len + skb_headlen(skb);
} }
static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
{
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
frag->page = page;
frag->page_offset = off;
frag->size = size;
skb_shinfo(skb)->nr_frags = i+1;
}
#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) \ #define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) \
BUG(); } while (0) BUG(); } while (0)
#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) \ #define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) \
......
...@@ -316,6 +316,26 @@ extern int ip6_build_xmit(struct sock *sk, ...@@ -316,6 +316,26 @@ extern int ip6_build_xmit(struct sock *sk,
struct ipv6_txoptions *opt, struct ipv6_txoptions *opt,
int hlimit, int flags); int hlimit, int flags);
extern int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
void *from,
int length,
int transhdrlen,
int hlimit,
struct ipv6_txoptions *opt,
struct flowi *fl,
struct rt6_info *rt,
unsigned int flags);
extern int ip6_push_pending_frames(struct sock *sk);
extern void ip6_flush_pending_frames(struct sock *sk);
extern int ip6_dst_lookup(struct sock *sk,
struct dst_entry **dst,
struct flowi *fl,
struct in6_addr **saddr);
/* /*
* skb processing functions * skb processing functions
*/ */
......
...@@ -123,6 +123,12 @@ struct xfrm_state ...@@ -123,6 +123,12 @@ struct xfrm_state
/* Data for encapsulator */ /* Data for encapsulator */
struct xfrm_encap_tmpl *encap; struct xfrm_encap_tmpl *encap;
/* IPComp needs an IPIP tunnel for handling uncompressed packets */
struct xfrm_state *tunnel;
/* If a tunnel, number of users + 1 */
atomic_t tunnel_users;
/* State for replay detection */ /* State for replay detection */
struct xfrm_replay_state replay; struct xfrm_replay_state replay;
...@@ -196,6 +202,8 @@ extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); ...@@ -196,6 +202,8 @@ extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo);
extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
extern void xfrm_state_delete_tunnel(struct xfrm_state *x);
struct xfrm_decap_state; struct xfrm_decap_state;
struct xfrm_type struct xfrm_type
{ {
...@@ -699,6 +707,11 @@ xfrm_state_addr_check(struct xfrm_state *x, ...@@ -699,6 +707,11 @@ xfrm_state_addr_check(struct xfrm_state *x,
return 0; return 0;
} }
static inline int xfrm_state_kern(struct xfrm_state *x)
{
return atomic_read(&x->tunnel_users);
}
/* /*
* xfrm algorithm information * xfrm algorithm information
*/ */
......
...@@ -40,7 +40,6 @@ ...@@ -40,7 +40,6 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/capability.h> #include <linux/capability.h>
#include <linux/highuid.h> #include <linux/highuid.h>
#include <linux/brlock.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/tty.h> #include <linux/tty.h>
...@@ -429,17 +428,6 @@ EXPORT_SYMBOL(del_timer_sync); ...@@ -429,17 +428,6 @@ EXPORT_SYMBOL(del_timer_sync);
#endif #endif
EXPORT_SYMBOL(mod_timer); EXPORT_SYMBOL(mod_timer);
#ifdef CONFIG_SMP
/* Big-Reader lock implementation */
EXPORT_SYMBOL(__brlock_array);
#ifndef __BRLOCK_USE_ATOMICS
EXPORT_SYMBOL(__br_write_locks);
#endif
EXPORT_SYMBOL(__br_write_lock);
EXPORT_SYMBOL(__br_write_unlock);
#endif
#ifdef HAVE_DISABLE_HLT #ifdef HAVE_DISABLE_HLT
EXPORT_SYMBOL(disable_hlt); EXPORT_SYMBOL(disable_hlt);
EXPORT_SYMBOL(enable_hlt); EXPORT_SYMBOL(enable_hlt);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
L_TARGET := lib.a L_TARGET := lib.a
obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \ obj-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
kobject.o idr.o kobject.o idr.o
......
/*
*
* linux/lib/brlock.c
*
* 'Big Reader' read-write spinlocks. See linux/brlock.h for details.
*
* Copyright 2000, Ingo Molnar <mingo@redhat.com>
* Copyright 2000, David S. Miller <davem@redhat.com>
*/
#include <linux/config.h>
#ifdef CONFIG_SMP
#include <linux/sched.h>
#include <linux/brlock.h>
#ifdef __BRLOCK_USE_ATOMICS
brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] =
{ [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = RW_LOCK_UNLOCKED } };
void __br_write_lock (enum brlock_indices idx)
{
int i;
preempt_disable();
for (i = 0; i < NR_CPUS; i++)
_raw_write_lock(&__brlock_array[i][idx]);
}
void __br_write_unlock (enum brlock_indices idx)
{
int i;
for (i = 0; i < NR_CPUS; i++)
_raw_write_unlock(&__brlock_array[i][idx]);
preempt_enable();
}
#else /* ! __BRLOCK_USE_ATOMICS */
brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] =
{ [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = 0 } };
struct br_wrlock __br_write_locks[__BR_IDX_MAX] =
{ [0 ... __BR_IDX_MAX-1] = { SPIN_LOCK_UNLOCKED } };
void __br_write_lock (enum brlock_indices idx)
{
int i;
preempt_disable();
again:
_raw_spin_lock(&__br_write_locks[idx].lock);
for (i = 0; i < NR_CPUS; i++)
if (__brlock_array[i][idx] != 0) {
_raw_spin_unlock(&__br_write_locks[idx].lock);
barrier();
cpu_relax();
goto again;
}
}
void __br_write_unlock (enum brlock_indices idx)
{
spin_unlock(&__br_write_locks[idx].lock);
}
#endif /* __BRLOCK_USE_ATOMICS */
#endif /* CONFIG_SMP */
...@@ -20,25 +20,19 @@ ...@@ -20,25 +20,19 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include "br_private.h" #include "br_private.h"
static __inline__ unsigned long __timeout(struct net_bridge *br) /* if topology_changing then use forward_delay (default 15 sec)
* otherwise keep longer (default 5 minutes)
*/
static __inline__ unsigned long hold_time(const struct net_bridge *br)
{ {
unsigned long timeout; return br->topology_change ? br->forward_delay : br->ageing_time;
timeout = jiffies - br->ageing_time;
if (br->topology_change)
timeout = jiffies - br->forward_delay;
return timeout;
} }
static __inline__ int has_expired(struct net_bridge *br, static __inline__ int has_expired(const struct net_bridge *br,
struct net_bridge_fdb_entry *fdb) const struct net_bridge_fdb_entry *fdb)
{ {
if (!fdb->is_static && return !fdb->is_static
time_before_eq(fdb->ageing_timer, __timeout(br))) && time_before_eq(fdb->ageing_timer + hold_time(br), jiffies);
return 1;
return 0;
} }
static __inline__ void copy_fdb(struct __fdb_entry *ent, static __inline__ void copy_fdb(struct __fdb_entry *ent,
...@@ -52,7 +46,7 @@ static __inline__ void copy_fdb(struct __fdb_entry *ent, ...@@ -52,7 +46,7 @@ static __inline__ void copy_fdb(struct __fdb_entry *ent,
: ((jiffies - f->ageing_timer) * USER_HZ) / HZ; : ((jiffies - f->ageing_timer) * USER_HZ) / HZ;
} }
static __inline__ int br_mac_hash(unsigned char *mac) static __inline__ int br_mac_hash(const unsigned char *mac)
{ {
unsigned long x; unsigned long x;
...@@ -68,7 +62,14 @@ static __inline__ int br_mac_hash(unsigned char *mac) ...@@ -68,7 +62,14 @@ static __inline__ int br_mac_hash(unsigned char *mac)
return x & (BR_HASH_SIZE - 1); return x & (BR_HASH_SIZE - 1);
} }
void br_fdb_changeaddr(struct net_bridge_port *p, unsigned char *newaddr) static __inline__ void fdb_delete(struct net_bridge_fdb_entry *f)
{
hlist_del(&f->hlist);
list_del(&f->age_list);
br_fdb_put(f);
}
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
{ {
struct net_bridge *br; struct net_bridge *br;
int i; int i;
...@@ -98,25 +99,29 @@ void br_fdb_changeaddr(struct net_bridge_port *p, unsigned char *newaddr) ...@@ -98,25 +99,29 @@ void br_fdb_changeaddr(struct net_bridge_port *p, unsigned char *newaddr)
write_unlock_bh(&br->hash_lock); write_unlock_bh(&br->hash_lock);
} }
void br_fdb_cleanup(struct net_bridge *br) void br_fdb_cleanup(unsigned long _data)
{ {
int i; struct net_bridge *br = (struct net_bridge *)_data;
unsigned long timeout; struct list_head *l, *n;
unsigned long delay;
timeout = __timeout(br);
write_lock_bh(&br->hash_lock); write_lock_bh(&br->hash_lock);
for (i=0;i<BR_HASH_SIZE;i++) { delay = hold_time(br);
struct hlist_node *h, *g;
hlist_for_each_safe(h, g, &br->hash[i]) { list_for_each_safe(l, n, &br->age_list) {
struct net_bridge_fdb_entry *f struct net_bridge_fdb_entry *f
= hlist_entry(h, struct net_bridge_fdb_entry, hlist); = list_entry(l, struct net_bridge_fdb_entry, age_list);
if (!f->is_static && unsigned long expires = f->ageing_timer + delay;
time_before_eq(f->ageing_timer, timeout)) {
hlist_del(&f->hlist); if (time_before_eq(expires, jiffies)) {
br_fdb_put(f); if (!f->is_static) {
pr_debug("expire age %lu jiffies %lu\n",
f->ageing_timer, jiffies);
fdb_delete(f);
} }
} else {
mod_timer(&br->gc_timer, expires);
break;
} }
} }
write_unlock_bh(&br->hash_lock); write_unlock_bh(&br->hash_lock);
...@@ -134,8 +139,7 @@ void br_fdb_delete_by_port(struct net_bridge *br, struct net_bridge_port *p) ...@@ -134,8 +139,7 @@ void br_fdb_delete_by_port(struct net_bridge *br, struct net_bridge_port *p)
struct net_bridge_fdb_entry *f struct net_bridge_fdb_entry *f
= hlist_entry(h, struct net_bridge_fdb_entry, hlist); = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
if (f->dst == p) { if (f->dst == p) {
hlist_del(&f->hlist); fdb_delete(f);
br_fdb_put(f);
} }
} }
} }
...@@ -237,55 +241,46 @@ int br_fdb_get_entries(struct net_bridge *br, ...@@ -237,55 +241,46 @@ int br_fdb_get_entries(struct net_bridge *br,
return num; return num;
} }
static __inline__ void __fdb_possibly_replace(struct net_bridge_fdb_entry *fdb, void br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
struct net_bridge_port *source, const unsigned char *addr, int is_local)
int is_local)
{
if (!fdb->is_static || is_local) {
fdb->dst = source;
fdb->is_local = is_local;
fdb->is_static = is_local;
fdb->ageing_timer = jiffies;
}
}
void br_fdb_insert(struct net_bridge *br,
struct net_bridge_port *source,
unsigned char *addr,
int is_local)
{ {
struct hlist_node *h; struct hlist_node *h;
struct net_bridge_fdb_entry *fdb; struct net_bridge_fdb_entry *fdb;
int hash; int hash = br_mac_hash(addr);
hash = br_mac_hash(addr);
write_lock_bh(&br->hash_lock); write_lock_bh(&br->hash_lock);
hlist_for_each(h, &br->hash[hash]) { hlist_for_each(h, &br->hash[hash]) {
fdb = hlist_entry(h, struct net_bridge_fdb_entry, hlist); fdb = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
if (!fdb->is_local && if (!fdb->is_local &&
!memcmp(fdb->addr.addr, addr, ETH_ALEN)) { !memcmp(fdb->addr.addr, addr, ETH_ALEN)) {
__fdb_possibly_replace(fdb, source, is_local); if (likely(!fdb->is_static || is_local)) {
write_unlock_bh(&br->hash_lock); /* move to end of age list */
return; list_del(&fdb->age_list);
goto update;
}
goto out;
} }
} }
fdb = kmalloc(sizeof(*fdb), GFP_ATOMIC); fdb = kmalloc(sizeof(*fdb), GFP_ATOMIC);
if (fdb == NULL) { if (fdb == NULL)
write_unlock_bh(&br->hash_lock); goto out;
return;
}
memcpy(fdb->addr.addr, addr, ETH_ALEN); memcpy(fdb->addr.addr, addr, ETH_ALEN);
atomic_set(&fdb->use_count, 1); atomic_set(&fdb->use_count, 1);
hlist_add_head(&fdb->hlist, &br->hash[hash]);
if (!timer_pending(&br->gc_timer)) {
br->gc_timer.expires = jiffies + hold_time(br);
add_timer(&br->gc_timer);
}
update:
fdb->dst = source; fdb->dst = source;
fdb->is_local = is_local; fdb->is_local = is_local;
fdb->is_static = is_local; fdb->is_static = is_local;
fdb->ageing_timer = jiffies; fdb->ageing_timer = jiffies;
list_add_tail(&fdb->age_list, &br->age_list);
hlist_add_head(&fdb->hlist, &br->hash[hash]); out:
write_unlock_bh(&br->hash_lock); write_unlock_bh(&br->hash_lock);
} }
...@@ -84,8 +84,6 @@ static struct net_bridge *new_nb(const char *name) ...@@ -84,8 +84,6 @@ static struct net_bridge *new_nb(const char *name)
memset(br, 0, sizeof(*br)); memset(br, 0, sizeof(*br));
dev = &br->dev; dev = &br->dev;
init_timer(&br->tick);
strncpy(dev->name, name, IFNAMSIZ); strncpy(dev->name, name, IFNAMSIZ);
dev->priv = br; dev->priv = br;
dev->priv_flags = IFF_EBRIDGE; dev->priv_flags = IFF_EBRIDGE;
...@@ -109,12 +107,10 @@ static struct net_bridge *new_nb(const char *name) ...@@ -109,12 +107,10 @@ static struct net_bridge *new_nb(const char *name)
br->bridge_forward_delay = br->forward_delay = 15 * HZ; br->bridge_forward_delay = br->forward_delay = 15 * HZ;
br->topology_change = 0; br->topology_change = 0;
br->topology_change_detected = 0; br->topology_change_detected = 0;
br_timer_clear(&br->hello_timer);
br_timer_clear(&br->tcn_timer);
br_timer_clear(&br->topology_change_timer);
br->ageing_time = 300 * HZ; br->ageing_time = 300 * HZ;
br->gc_interval = 4 * HZ; INIT_LIST_HEAD(&br->age_list);
br_stp_timer_init(br);
return br; return br;
} }
......
...@@ -32,9 +32,10 @@ static inline unsigned long ticks_to_user(unsigned long tick) ...@@ -32,9 +32,10 @@ static inline unsigned long ticks_to_user(unsigned long tick)
} }
/* Report time remaining in user HZ */ /* Report time remaining in user HZ */
static unsigned long timer_residue(const struct br_timer *timer) static unsigned long timer_residue(const struct timer_list *timer)
{ {
return ticks_to_user(timer->running ? (jiffies - timer->expires) : 0); return ticks_to_user(timer_pending(timer)
? (timer->expires - jiffies) : 0);
} }
static int br_ioctl_device(struct net_bridge *br, static int br_ioctl_device(struct net_bridge *br,
...@@ -87,7 +88,6 @@ static int br_ioctl_device(struct net_bridge *br, ...@@ -87,7 +88,6 @@ static int br_ioctl_device(struct net_bridge *br,
b.root_port = br->root_port; b.root_port = br->root_port;
b.stp_enabled = br->stp_enabled; b.stp_enabled = br->stp_enabled;
b.ageing_time = ticks_to_user(br->ageing_time); b.ageing_time = ticks_to_user(br->ageing_time);
b.gc_interval = ticks_to_user(br->gc_interval);
b.hello_timer_value = timer_residue(&br->hello_timer); b.hello_timer_value = timer_residue(&br->hello_timer);
b.tcn_timer_value = timer_residue(&br->tcn_timer); b.tcn_timer_value = timer_residue(&br->tcn_timer);
b.topology_change_timer_value = timer_residue(&br->topology_change_timer); b.topology_change_timer_value = timer_residue(&br->topology_change_timer);
...@@ -146,8 +146,7 @@ static int br_ioctl_device(struct net_bridge *br, ...@@ -146,8 +146,7 @@ static int br_ioctl_device(struct net_bridge *br,
br->ageing_time = user_to_ticks(arg0); br->ageing_time = user_to_ticks(arg0);
return 0; return 0;
case BRCTL_SET_GC_INTERVAL: case BRCTL_SET_GC_INTERVAL: /* no longer used */
br->gc_interval = user_to_ticks(arg0);
return 0; return 0;
case BRCTL_GET_PORT_INFO: case BRCTL_GET_PORT_INFO:
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/miscdevice.h> #include <linux/miscdevice.h>
#include <linux/if_bridge.h> #include <linux/if_bridge.h>
#include "br_private_timer.h"
#define BR_HASH_BITS 8 #define BR_HASH_BITS 8
#define BR_HASH_SIZE (1 << BR_HASH_BITS) #define BR_HASH_SIZE (1 << BR_HASH_BITS)
...@@ -44,10 +43,11 @@ struct mac_addr ...@@ -44,10 +43,11 @@ struct mac_addr
struct net_bridge_fdb_entry struct net_bridge_fdb_entry
{ {
struct hlist_node hlist; struct hlist_node hlist;
atomic_t use_count;
mac_addr addr;
struct net_bridge_port *dst; struct net_bridge_port *dst;
struct list_head age_list;
atomic_t use_count;
unsigned long ageing_timer; unsigned long ageing_timer;
mac_addr addr;
unsigned is_local:1; unsigned is_local:1;
unsigned is_static:1; unsigned is_static:1;
}; };
...@@ -71,10 +71,9 @@ struct net_bridge_port ...@@ -71,10 +71,9 @@ struct net_bridge_port
unsigned config_pending:1; unsigned config_pending:1;
int priority; int priority;
struct br_timer forward_delay_timer; struct timer_list forward_delay_timer;
struct br_timer hold_timer; struct timer_list hold_timer;
struct br_timer message_age_timer; struct timer_list message_age_timer;
struct rcu_head rcu; struct rcu_head rcu;
}; };
...@@ -86,7 +85,7 @@ struct net_bridge ...@@ -86,7 +85,7 @@ struct net_bridge
struct net_device_stats statistics; struct net_device_stats statistics;
rwlock_t hash_lock; rwlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE]; struct hlist_head hash[BR_HASH_SIZE];
struct timer_list tick; struct list_head age_list;
/* STP */ /* STP */
bridge_id designated_root; bridge_id designated_root;
...@@ -103,13 +102,12 @@ struct net_bridge ...@@ -103,13 +102,12 @@ struct net_bridge
unsigned topology_change:1; unsigned topology_change:1;
unsigned topology_change_detected:1; unsigned topology_change_detected:1;
struct br_timer hello_timer; struct timer_list hello_timer;
struct br_timer tcn_timer; struct timer_list tcn_timer;
struct br_timer topology_change_timer; struct timer_list topology_change_timer;
struct br_timer gc_timer; struct timer_list gc_timer;
int ageing_time; int ageing_time;
int gc_interval;
}; };
extern struct notifier_block br_device_notifier; extern struct notifier_block br_device_notifier;
...@@ -128,8 +126,8 @@ extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev); ...@@ -128,8 +126,8 @@ extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
/* br_fdb.c */ /* br_fdb.c */
extern void br_fdb_changeaddr(struct net_bridge_port *p, extern void br_fdb_changeaddr(struct net_bridge_port *p,
unsigned char *newaddr); const unsigned char *newaddr);
extern void br_fdb_cleanup(struct net_bridge *br); extern void br_fdb_cleanup(unsigned long arg);
extern void br_fdb_delete_by_port(struct net_bridge *br, extern void br_fdb_delete_by_port(struct net_bridge *br,
struct net_bridge_port *p); struct net_bridge_port *p);
extern struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br, extern struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br,
...@@ -141,7 +139,7 @@ extern int br_fdb_get_entries(struct net_bridge *br, ...@@ -141,7 +139,7 @@ extern int br_fdb_get_entries(struct net_bridge *br,
int offset); int offset);
extern void br_fdb_insert(struct net_bridge *br, extern void br_fdb_insert(struct net_bridge *br,
struct net_bridge_port *source, struct net_bridge_port *source,
unsigned char *addr, const unsigned char *addr,
int is_local); int is_local);
/* br_forward.c */ /* br_forward.c */
...@@ -188,10 +186,10 @@ extern int br_netfilter_init(void); ...@@ -188,10 +186,10 @@ extern int br_netfilter_init(void);
extern void br_netfilter_fini(void); extern void br_netfilter_fini(void);
/* br_stp.c */ /* br_stp.c */
extern void br_log_state(const struct net_bridge_port *p);
extern struct net_bridge_port *br_get_port(struct net_bridge *br, extern struct net_bridge_port *br_get_port(struct net_bridge *br,
int port_no); int port_no);
extern void br_init_port(struct net_bridge_port *p); extern void br_init_port(struct net_bridge_port *p);
extern port_id br_make_port_id(struct net_bridge_port *p);
extern void br_become_designated_port(struct net_bridge_port *p); extern void br_become_designated_port(struct net_bridge_port *p);
/* br_stp_if.c */ /* br_stp_if.c */
...@@ -210,4 +208,8 @@ extern void br_stp_set_path_cost(struct net_bridge_port *p, ...@@ -210,4 +208,8 @@ extern void br_stp_set_path_cost(struct net_bridge_port *p,
/* br_stp_bpdu.c */ /* br_stp_bpdu.c */
extern void br_stp_handle_bpdu(struct sk_buff *skb); extern void br_stp_handle_bpdu(struct sk_buff *skb);
/* br_stp_timer.c */
extern void br_stp_timer_init(struct net_bridge *br);
extern void br_stp_port_timer_init(struct net_bridge_port *p);
#endif #endif
...@@ -47,7 +47,6 @@ extern void br_configuration_update(struct net_bridge *); ...@@ -47,7 +47,6 @@ extern void br_configuration_update(struct net_bridge *);
extern void br_port_state_selection(struct net_bridge *); extern void br_port_state_selection(struct net_bridge *);
extern void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu); extern void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu);
extern void br_received_tcn_bpdu(struct net_bridge_port *p); extern void br_received_tcn_bpdu(struct net_bridge_port *p);
extern void br_tick(unsigned long __data);
extern void br_transmit_config(struct net_bridge_port *p); extern void br_transmit_config(struct net_bridge_port *p);
extern void br_transmit_tcn(struct net_bridge *br); extern void br_transmit_tcn(struct net_bridge *br);
extern void br_topology_change_detection(struct net_bridge *br); extern void br_topology_change_detection(struct net_bridge *br);
......
/*
* Linux ethernet bridge
*
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
* $Id: br_private_timer.h,v 1.1 2000/02/18 16:47:13 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _BR_PRIVATE_TIMER_H
#define _BR_PRIVATE_TIMER_H
struct br_timer
{
int running;
unsigned long expires;
};
extern __inline__ void br_timer_clear(struct br_timer *t)
{
t->running = 0;
}
extern __inline__ unsigned long br_timer_get_residue(struct br_timer *t)
{
if (t->running)
return jiffies - t->expires;
return 0;
}
extern __inline__ void br_timer_set(struct br_timer *t, unsigned long x)
{
t->expires = x;
t->running = 1;
}
extern __inline__ int br_timer_is_running(struct br_timer *t)
{
return t->running;
}
extern __inline__ int br_timer_has_expired(struct br_timer *t, unsigned long to)
{
return t->running && time_after_eq(jiffies, t->expires + to);
}
#endif
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
* as published by the Free Software Foundation; either version * as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/if_bridge.h> #include <linux/if_bridge.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
...@@ -20,6 +19,18 @@ ...@@ -20,6 +19,18 @@
#include "br_private.h" #include "br_private.h"
#include "br_private_stp.h" #include "br_private_stp.h"
static const char *br_port_state_names[] = {
"disabled", "learning", "forwarding", "blocking",
};
void br_log_state(const struct net_bridge_port *p)
{
pr_info("%s: port %d(%s) entering %s state\n",
p->br->dev.name, p->port_no, p->dev->name,
br_port_state_names[p->state]);
}
/* called under bridge lock */ /* called under bridge lock */
struct net_bridge_port *br_get_port(struct net_bridge *br, int port_no) struct net_bridge_port *br_get_port(struct net_bridge *br, int port_no)
{ {
...@@ -34,7 +45,8 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, int port_no) ...@@ -34,7 +45,8 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, int port_no)
} }
/* called under bridge lock */ /* called under bridge lock */
static int br_should_become_root_port(struct net_bridge_port *p, int root_port) static int br_should_become_root_port(const struct net_bridge_port *p,
int root_port)
{ {
struct net_bridge *br; struct net_bridge *br;
struct net_bridge_port *rp; struct net_bridge_port *rp;
...@@ -116,9 +128,12 @@ void br_become_root_bridge(struct net_bridge *br) ...@@ -116,9 +128,12 @@ void br_become_root_bridge(struct net_bridge *br)
br->hello_time = br->bridge_hello_time; br->hello_time = br->bridge_hello_time;
br->forward_delay = br->bridge_forward_delay; br->forward_delay = br->bridge_forward_delay;
br_topology_change_detection(br); br_topology_change_detection(br);
br_timer_clear(&br->tcn_timer); del_timer(&br->tcn_timer);
if (br->dev.flags & IFF_UP) {
br_config_bpdu_generation(br); br_config_bpdu_generation(br);
br_timer_set(&br->hello_timer, jiffies); mod_timer(&br->hello_timer, jiffies + br->hello_time);
}
} }
/* called under bridge lock */ /* called under bridge lock */
...@@ -127,7 +142,8 @@ void br_transmit_config(struct net_bridge_port *p) ...@@ -127,7 +142,8 @@ void br_transmit_config(struct net_bridge_port *p)
struct br_config_bpdu bpdu; struct br_config_bpdu bpdu;
struct net_bridge *br; struct net_bridge *br;
if (br_timer_is_running(&p->hold_timer)) {
if (timer_pending(&p->hold_timer)) {
p->config_pending = 1; p->config_pending = 1;
return; return;
} }
...@@ -142,12 +158,11 @@ void br_transmit_config(struct net_bridge_port *p) ...@@ -142,12 +158,11 @@ void br_transmit_config(struct net_bridge_port *p)
bpdu.port_id = p->port_id; bpdu.port_id = p->port_id;
bpdu.message_age = 0; bpdu.message_age = 0;
if (!br_is_root_bridge(br)) { if (!br_is_root_bridge(br)) {
struct net_bridge_port *root; struct net_bridge_port *root
unsigned long age; = br_get_port(br, br->root_port);
bpdu.max_age = root->message_age_timer.expires - jiffies;
root = br_get_port(br, br->root_port); if (bpdu.max_age <= 0) bpdu.max_age = 1;
age = br_timer_get_residue(&root->message_age_timer) + 1;
bpdu.message_age = age;
} }
bpdu.max_age = br->max_age; bpdu.max_age = br->max_age;
bpdu.hello_time = br->hello_time; bpdu.hello_time = br->hello_time;
...@@ -157,22 +172,26 @@ void br_transmit_config(struct net_bridge_port *p) ...@@ -157,22 +172,26 @@ void br_transmit_config(struct net_bridge_port *p)
p->topology_change_ack = 0; p->topology_change_ack = 0;
p->config_pending = 0; p->config_pending = 0;
br_timer_set(&p->hold_timer, jiffies);
mod_timer(&p->hold_timer, jiffies + BR_HOLD_TIME);
} }
/* called under bridge lock */ /* called under bridge lock */
static void br_record_config_information(struct net_bridge_port *p, struct br_config_bpdu *bpdu) static inline void br_record_config_information(struct net_bridge_port *p,
const struct br_config_bpdu *bpdu)
{ {
p->designated_root = bpdu->root; p->designated_root = bpdu->root;
p->designated_cost = bpdu->root_path_cost; p->designated_cost = bpdu->root_path_cost;
p->designated_bridge = bpdu->bridge_id; p->designated_bridge = bpdu->bridge_id;
p->designated_port = bpdu->port_id; p->designated_port = bpdu->port_id;
br_timer_set(&p->message_age_timer, jiffies - bpdu->message_age); mod_timer(&p->message_age_timer, jiffies
+ (p->br->max_age - bpdu->message_age));
} }
/* called under bridge lock */ /* called under bridge lock */
static void br_record_config_timeout_values(struct net_bridge *br, struct br_config_bpdu *bpdu) static inline void br_record_config_timeout_values(struct net_bridge *br,
const struct br_config_bpdu *bpdu)
{ {
br->max_age = bpdu->max_age; br->max_age = bpdu->max_age;
br->hello_time = bpdu->hello_time; br->hello_time = bpdu->hello_time;
...@@ -187,7 +206,7 @@ void br_transmit_tcn(struct net_bridge *br) ...@@ -187,7 +206,7 @@ void br_transmit_tcn(struct net_bridge *br)
} }
/* called under bridge lock */ /* called under bridge lock */
static int br_should_become_designated_port(struct net_bridge_port *p) static int br_should_become_designated_port(const struct net_bridge_port *p)
{ {
struct net_bridge *br; struct net_bridge *br;
int t; int t;
...@@ -261,25 +280,28 @@ static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_b ...@@ -261,25 +280,28 @@ static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_b
} }
/* called under bridge lock */ /* called under bridge lock */
static void br_topology_change_acknowledged(struct net_bridge *br) static inline void br_topology_change_acknowledged(struct net_bridge *br)
{ {
br->topology_change_detected = 0; br->topology_change_detected = 0;
br_timer_clear(&br->tcn_timer); del_timer(&br->tcn_timer);
} }
/* called under bridge lock */ /* called under bridge lock */
void br_topology_change_detection(struct net_bridge *br) void br_topology_change_detection(struct net_bridge *br)
{ {
printk(KERN_INFO "%s: topology change detected", br->dev.name); if (!(br->dev.flags & IFF_UP))
return;
pr_info("%s: topology change detected", br->dev.name);
if (br_is_root_bridge(br)) { if (br_is_root_bridge(br)) {
printk(", propagating"); printk(", propagating");
br->topology_change = 1; br->topology_change = 1;
br_timer_set(&br->topology_change_timer, jiffies); mod_timer(&br->topology_change_timer, jiffies
+ br->bridge_forward_delay + br->bridge_max_age);
} else if (!br->topology_change_detected) { } else if (!br->topology_change_detected) {
printk(", sending tcn bpdu"); printk(", sending tcn bpdu");
br_transmit_tcn(br); br_transmit_tcn(br);
br_timer_set(&br->tcn_timer, jiffies); mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time);
} }
printk("\n"); printk("\n");
...@@ -299,7 +321,7 @@ void br_config_bpdu_generation(struct net_bridge *br) ...@@ -299,7 +321,7 @@ void br_config_bpdu_generation(struct net_bridge *br)
} }
/* called under bridge lock */ /* called under bridge lock */
static void br_reply(struct net_bridge_port *p) static inline void br_reply(struct net_bridge_port *p)
{ {
br_transmit_config(p); br_transmit_config(p);
} }
...@@ -323,6 +345,7 @@ void br_become_designated_port(struct net_bridge_port *p) ...@@ -323,6 +345,7 @@ void br_become_designated_port(struct net_bridge_port *p)
p->designated_port = p->port_id; p->designated_port = p->port_id;
} }
/* called under bridge lock */ /* called under bridge lock */
static void br_make_blocking(struct net_bridge_port *p) static void br_make_blocking(struct net_bridge_port *p)
{ {
...@@ -332,11 +355,9 @@ static void br_make_blocking(struct net_bridge_port *p) ...@@ -332,11 +355,9 @@ static void br_make_blocking(struct net_bridge_port *p)
p->state == BR_STATE_LEARNING) p->state == BR_STATE_LEARNING)
br_topology_change_detection(p->br); br_topology_change_detection(p->br);
printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
p->br->dev.name, p->port_no, p->dev->name, "blocking");
p->state = BR_STATE_BLOCKING; p->state = BR_STATE_BLOCKING;
br_timer_clear(&p->forward_delay_timer); br_log_state(p);
del_timer(&p->forward_delay_timer);
} }
} }
...@@ -345,20 +366,12 @@ static void br_make_forwarding(struct net_bridge_port *p) ...@@ -345,20 +366,12 @@ static void br_make_forwarding(struct net_bridge_port *p)
{ {
if (p->state == BR_STATE_BLOCKING) { if (p->state == BR_STATE_BLOCKING) {
if (p->br->stp_enabled) { if (p->br->stp_enabled) {
printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
p->br->dev.name, p->port_no, p->dev->name,
"listening");
p->state = BR_STATE_LISTENING; p->state = BR_STATE_LISTENING;
} else { } else {
printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
p->br->dev.name, p->port_no, p->dev->name,
"learning");
p->state = BR_STATE_LEARNING; p->state = BR_STATE_LEARNING;
} }
br_timer_set(&p->forward_delay_timer, jiffies); br_log_state(p);
} mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); }
} }
/* called under bridge lock */ /* called under bridge lock */
...@@ -373,7 +386,7 @@ void br_port_state_selection(struct net_bridge *br) ...@@ -373,7 +386,7 @@ void br_port_state_selection(struct net_bridge *br)
p->topology_change_ack = 0; p->topology_change_ack = 0;
br_make_forwarding(p); br_make_forwarding(p);
} else if (br_is_designated_port(p)) { } else if (br_is_designated_port(p)) {
br_timer_clear(&p->message_age_timer); del_timer(&p->message_age_timer);
br_make_forwarding(p); br_make_forwarding(p);
} else { } else {
p->config_pending = 0; p->config_pending = 0;
...@@ -381,11 +394,12 @@ void br_port_state_selection(struct net_bridge *br) ...@@ -381,11 +394,12 @@ void br_port_state_selection(struct net_bridge *br)
br_make_blocking(p); br_make_blocking(p);
} }
} }
} }
} }
/* called under bridge lock */ /* called under bridge lock */
static void br_topology_change_acknowledge(struct net_bridge_port *p) static inline void br_topology_change_acknowledge(struct net_bridge_port *p)
{ {
p->topology_change_ack = 1; p->topology_change_ack = 1;
br_transmit_config(p); br_transmit_config(p);
...@@ -399,17 +413,20 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b ...@@ -399,17 +413,20 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b
br = p->br; br = p->br;
was_root = br_is_root_bridge(br); was_root = br_is_root_bridge(br);
if (br_supersedes_port_info(p, bpdu)) { if (br_supersedes_port_info(p, bpdu)) {
br_record_config_information(p, bpdu); br_record_config_information(p, bpdu);
br_configuration_update(br); br_configuration_update(br);
br_port_state_selection(br); br_port_state_selection(br);
if (!br_is_root_bridge(br) && was_root) { if (!br_is_root_bridge(br) && was_root) {
br_timer_clear(&br->hello_timer); del_timer(&br->hello_timer);
if (br->topology_change_detected) { if (br->topology_change_detected) {
br_timer_clear(&br->topology_change_timer); del_timer(&br->topology_change_timer);
br_transmit_tcn(br); br_transmit_tcn(br);
br_timer_set(&br->tcn_timer, jiffies);
mod_timer(&br->tcn_timer,
jiffies + br->bridge_hello_time);
} }
} }
...@@ -428,7 +445,7 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b ...@@ -428,7 +445,7 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b
void br_received_tcn_bpdu(struct net_bridge_port *p) void br_received_tcn_bpdu(struct net_bridge_port *p)
{ {
if (br_is_designated_port(p)) { if (br_is_designated_port(p)) {
printk(KERN_INFO "%s: received tcn bpdu on port %i(%s)\n", pr_info("%s: received tcn bpdu on port %i(%s)\n",
p->br->dev.name, p->port_no, p->dev->name); p->br->dev.name, p->port_no, p->dev->name);
br_topology_change_detection(p->br); br_topology_change_detection(p->br);
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include "br_private.h" #include "br_private.h"
#include "br_private_stp.h" #include "br_private_stp.h"
__u16 br_make_port_id(struct net_bridge_port *p) static inline __u16 br_make_port_id(const struct net_bridge_port *p)
{ {
return (p->priority << 8) | p->port_no; return (p->priority << 8) | p->port_no;
} }
...@@ -33,33 +33,25 @@ void br_init_port(struct net_bridge_port *p) ...@@ -33,33 +33,25 @@ void br_init_port(struct net_bridge_port *p)
p->state = BR_STATE_BLOCKING; p->state = BR_STATE_BLOCKING;
p->topology_change_ack = 0; p->topology_change_ack = 0;
p->config_pending = 0; p->config_pending = 0;
br_timer_clear(&p->message_age_timer);
br_timer_clear(&p->forward_delay_timer); br_stp_port_timer_init(p);
br_timer_clear(&p->hold_timer);
} }
/* called under bridge lock */ /* called under bridge lock */
void br_stp_enable_bridge(struct net_bridge *br) void br_stp_enable_bridge(struct net_bridge *br)
{ {
struct net_bridge_port *p; struct net_bridge_port *p;
struct timer_list *timer = &br->tick;
spin_lock_bh(&br->lock); spin_lock_bh(&br->lock);
init_timer(timer); br->hello_timer.expires = jiffies + br->hello_time;
timer->data = (unsigned long) br; add_timer(&br->hello_timer);
timer->function = br_tick;
timer->expires = jiffies + 1;
add_timer(timer);
br_timer_set(&br->hello_timer, jiffies);
br_config_bpdu_generation(br); br_config_bpdu_generation(br);
list_for_each_entry(p, &br->port_list, list) { list_for_each_entry(p, &br->port_list, list) {
if (p->dev->flags & IFF_UP) if (p->dev->flags & IFF_UP)
br_stp_enable_port(p); br_stp_enable_port(p);
}
br_timer_set(&br->gc_timer, jiffies); }
spin_unlock_bh(&br->lock); spin_unlock_bh(&br->lock);
} }
...@@ -68,22 +60,22 @@ void br_stp_disable_bridge(struct net_bridge *br) ...@@ -68,22 +60,22 @@ void br_stp_disable_bridge(struct net_bridge *br)
{ {
struct net_bridge_port *p; struct net_bridge_port *p;
spin_lock_bh(&br->lock); spin_lock(&br->lock);
br->topology_change = 0;
br->topology_change_detected = 0;
br_timer_clear(&br->hello_timer);
br_timer_clear(&br->topology_change_timer);
br_timer_clear(&br->tcn_timer);
br_timer_clear(&br->gc_timer);
br_fdb_cleanup(br);
list_for_each_entry(p, &br->port_list, list) { list_for_each_entry(p, &br->port_list, list) {
if (p->state != BR_STATE_DISABLED) if (p->state != BR_STATE_DISABLED)
br_stp_disable_port(p); br_stp_disable_port(p);
} }
spin_unlock_bh(&br->lock);
del_timer_sync(&br->tick); br->topology_change = 0;
br->topology_change_detected = 0;
spin_unlock(&br->lock);
del_timer_sync(&br->hello_timer);
del_timer_sync(&br->topology_change_timer);
del_timer_sync(&br->tcn_timer);
del_timer_sync(&br->gc_timer);
} }
/* called under bridge lock */ /* called under bridge lock */
...@@ -108,10 +100,13 @@ void br_stp_disable_port(struct net_bridge_port *p) ...@@ -108,10 +100,13 @@ void br_stp_disable_port(struct net_bridge_port *p)
p->state = BR_STATE_DISABLED; p->state = BR_STATE_DISABLED;
p->topology_change_ack = 0; p->topology_change_ack = 0;
p->config_pending = 0; p->config_pending = 0;
br_timer_clear(&p->message_age_timer);
br_timer_clear(&p->forward_delay_timer); del_timer(&p->message_age_timer);
br_timer_clear(&p->hold_timer); del_timer(&p->forward_delay_timer);
del_timer(&p->hold_timer);
br_configuration_update(br); br_configuration_update(br);
br_port_state_selection(br); br_port_state_selection(br);
if (br_is_root_bridge(br) && !wasroot) if (br_is_root_bridge(br) && !wasroot)
......
...@@ -20,15 +20,8 @@ ...@@ -20,15 +20,8 @@
#include "br_private.h" #include "br_private.h"
#include "br_private_stp.h" #include "br_private_stp.h"
static void dump_bridge_id(bridge_id *id)
{
printk("%.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x", id->prio[0],
id->prio[1], id->addr[0], id->addr[1], id->addr[2], id->addr[3],
id->addr[4], id->addr[5]);
}
/* called under bridge lock */ /* called under bridge lock */
static int br_is_designated_for_some_port(struct net_bridge *br) static int br_is_designated_for_some_port(const struct net_bridge *br)
{ {
struct net_bridge_port *p; struct net_bridge_port *p;
...@@ -41,30 +34,45 @@ static int br_is_designated_for_some_port(struct net_bridge *br) ...@@ -41,30 +34,45 @@ static int br_is_designated_for_some_port(struct net_bridge *br)
return 0; return 0;
} }
/* called under bridge lock */ static void br_hello_timer_expired(unsigned long arg)
static void br_hello_timer_expired(struct net_bridge *br)
{ {
struct net_bridge *br = (struct net_bridge *)arg;
pr_debug("%s: hello timer expired\n", br->dev.name);
spin_lock_bh(&br->lock);
if (br->dev.flags & IFF_UP) {
br_config_bpdu_generation(br); br_config_bpdu_generation(br);
br_timer_set(&br->hello_timer, jiffies);
br->hello_timer.expires = jiffies + br->hello_time;
add_timer(&br->hello_timer);
}
spin_unlock_bh(&br->lock);
} }
/* called under bridge lock */ static void br_message_age_timer_expired(unsigned long arg)
static void br_message_age_timer_expired(struct net_bridge_port *p)
{ {
struct net_bridge *br; struct net_bridge_port *p = (struct net_bridge_port *) arg;
struct net_bridge *br = p->br;
const bridge_id *id = &p->designated_bridge;
int was_root; int was_root;
br = p->br; if (p->state == BR_STATE_DISABLED)
printk(KERN_INFO "%s: ", br->dev.name); return;
printk("neighbour ");
dump_bridge_id(&p->designated_bridge);
printk(" lost on port %i(%s)\n", p->port_no, p->dev->name); pr_info("%s: neighbor %.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x lost on port %d(%s)\n",
br->dev.name,
id->prio[0], id->prio[1],
id->addr[0], id->addr[1], id->addr[2],
id->addr[3], id->addr[4], id->addr[5],
p->port_no, p->dev->name);
/* /*
* According to the spec, the message age timer cannot be * According to the spec, the message age timer cannot be
* running when we are the root bridge. So.. this was_root * running when we are the root bridge. So.. this was_root
* check is redundant. I'm leaving it in for now, though. * check is redundant. I'm leaving it in for now, though.
*/ */
spin_lock_bh(&br->lock);
was_root = br_is_root_bridge(br); was_root = br_is_root_bridge(br);
br_become_designated_port(p); br_become_designated_port(p);
...@@ -72,107 +80,101 @@ static void br_message_age_timer_expired(struct net_bridge_port *p) ...@@ -72,107 +80,101 @@ static void br_message_age_timer_expired(struct net_bridge_port *p)
br_port_state_selection(br); br_port_state_selection(br);
if (br_is_root_bridge(br) && !was_root) if (br_is_root_bridge(br) && !was_root)
br_become_root_bridge(br); br_become_root_bridge(br);
spin_unlock_bh(&br->lock);
} }
/* called under bridge lock */ static void br_forward_delay_timer_expired(unsigned long arg)
static void br_forward_delay_timer_expired(struct net_bridge_port *p)
{ {
if (p->state == BR_STATE_LISTENING) { struct net_bridge_port *p = (struct net_bridge_port *) arg;
printk(KERN_INFO "%s: port %i(%s) entering %s state\n", struct net_bridge *br = p->br;
p->br->dev.name, p->port_no, p->dev->name, "learning");
pr_debug("%s: %d(%s) forward delay timer\n",
br->dev.name, p->port_no, p->dev->name);
spin_lock_bh(&br->lock);
if (p->state == BR_STATE_LISTENING) {
p->state = BR_STATE_LEARNING; p->state = BR_STATE_LEARNING;
br_timer_set(&p->forward_delay_timer, jiffies); p->forward_delay_timer.expires = jiffies + br->forward_delay;
add_timer(&p->forward_delay_timer);
} else if (p->state == BR_STATE_LEARNING) { } else if (p->state == BR_STATE_LEARNING) {
printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
p->br->dev.name, p->port_no, p->dev->name, "forwarding");
p->state = BR_STATE_FORWARDING; p->state = BR_STATE_FORWARDING;
if (br_is_designated_for_some_port(p->br)) if (br_is_designated_for_some_port(br))
br_topology_change_detection(p->br); br_topology_change_detection(br);
} }
br_log_state(p);
spin_unlock_bh(&br->lock);
} }
/* called under bridge lock */ static void br_tcn_timer_expired(unsigned long arg)
static void br_tcn_timer_expired(struct net_bridge *br)
{ {
printk(KERN_INFO "%s: retransmitting tcn bpdu\n", br->dev.name); struct net_bridge *br = (struct net_bridge *) arg;
pr_debug("%s: tcn timer expired\n", br->dev.name);
spin_lock_bh(&br->lock);
if (br->dev.flags & IFF_UP) {
br_transmit_tcn(br); br_transmit_tcn(br);
br_timer_set(&br->tcn_timer, jiffies);
br->tcn_timer.expires = jiffies + br->bridge_hello_time;
add_timer(&br->tcn_timer);
}
spin_unlock_bh(&br->lock);
} }
/* called under bridge lock */ static void br_topology_change_timer_expired(unsigned long arg)
static void br_topology_change_timer_expired(struct net_bridge *br)
{ {
struct net_bridge *br = (struct net_bridge *) arg;
pr_debug("%s: topo change timer expired\n", br->dev.name);
spin_lock_bh(&br->lock);
br->topology_change_detected = 0; br->topology_change_detected = 0;
br->topology_change = 0; br->topology_change = 0;
spin_unlock_bh(&br->lock);
} }
/* called under bridge lock */ static void br_hold_timer_expired(unsigned long arg)
static void br_hold_timer_expired(struct net_bridge_port *p)
{ {
struct net_bridge_port *p = (struct net_bridge_port *) arg;
pr_debug("%s: %d(%s) hold timer expired\n",
p->br->dev.name, p->port_no, p->dev->name);
spin_lock_bh(&p->br->lock);
if (p->config_pending) if (p->config_pending)
br_transmit_config(p); br_transmit_config(p);
spin_unlock_bh(&p->br->lock);
} }
/* called under bridge lock */ static inline void br_timer_init(struct timer_list *timer,
static void br_check_port_timers(struct net_bridge_port *p) void (*_function)(unsigned long),
unsigned long _data)
{ {
if (br_timer_has_expired(&p->message_age_timer, p->br->max_age)) { init_timer(timer);
br_timer_clear(&p->message_age_timer); timer->function = _function;
br_message_age_timer_expired(p); timer->data = _data;
}
if (br_timer_has_expired(&p->forward_delay_timer, p->br->forward_delay)) {
br_timer_clear(&p->forward_delay_timer);
br_forward_delay_timer_expired(p);
}
if (br_timer_has_expired(&p->hold_timer, BR_HOLD_TIME)) {
br_timer_clear(&p->hold_timer);
br_hold_timer_expired(p);
}
} }
/* called under bridge lock */ void br_stp_timer_init(struct net_bridge *br)
static void br_check_timers(struct net_bridge *br)
{ {
struct net_bridge_port *p; br_timer_init(&br->hello_timer, br_hello_timer_expired,
(unsigned long) br);
if (br_timer_has_expired(&br->gc_timer, br->gc_interval)) { br_timer_init(&br->tcn_timer, br_tcn_timer_expired,
br_timer_set(&br->gc_timer, jiffies); (unsigned long) br);
br_fdb_cleanup(br);
}
if (br_timer_has_expired(&br->hello_timer, br->hello_time)) {
br_timer_clear(&br->hello_timer);
br_hello_timer_expired(br);
}
if (br_timer_has_expired(&br->tcn_timer, br->bridge_hello_time)) { br_timer_init(&br->topology_change_timer,
br_timer_clear(&br->tcn_timer); br_topology_change_timer_expired,
br_tcn_timer_expired(br); (unsigned long) br);
}
if (br_timer_has_expired(&br->topology_change_timer, br->bridge_forward_delay + br->bridge_max_age)) {
br_timer_clear(&br->topology_change_timer);
br_topology_change_timer_expired(br);
}
list_for_each_entry(p, &br->port_list, list) { br_timer_init(&br->gc_timer, br_fdb_cleanup, (unsigned long) br);
if (p->state != BR_STATE_DISABLED)
br_check_port_timers(p);
}
} }
void br_tick(unsigned long __data) void br_stp_port_timer_init(struct net_bridge_port *p)
{ {
struct net_bridge *br = (struct net_bridge *)__data; br_timer_init(&p->message_age_timer, br_message_age_timer_expired,
(unsigned long) p);
if (spin_trylock_bh(&br->lock)) { br_timer_init(&p->forward_delay_timer, br_forward_delay_timer_expired,
br_check_timers(br); (unsigned long) p);
spin_unlock_bh(&br->lock);
} br_timer_init(&p->hold_timer, br_hold_timer_expired,
br->tick.expires = jiffies + 1; (unsigned long) p);
add_timer(&br->tick);
} }
...@@ -90,7 +90,6 @@ ...@@ -90,7 +90,6 @@
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/brlock.h>
#include <net/sock.h> #include <net/sock.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
...@@ -170,8 +169,9 @@ const char *if_port_text[] = { ...@@ -170,8 +169,9 @@ const char *if_port_text[] = {
* 86DD IPv6 * 86DD IPv6
*/ */
static struct packet_type *ptype_base[16]; /* 16 way hashed list */ static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED;
static struct packet_type *ptype_all; /* Taps */ static struct list_head ptype_base[16]; /* 16 way hashed list */
static struct list_head ptype_all; /* Taps */
#ifdef OFFLINE_SAMPLE #ifdef OFFLINE_SAMPLE
static void sample_queue(unsigned long dummy); static void sample_queue(unsigned long dummy);
...@@ -239,14 +239,17 @@ int netdev_nit; ...@@ -239,14 +239,17 @@ int netdev_nit;
* Add a protocol handler to the networking stack. The passed &packet_type * Add a protocol handler to the networking stack. The passed &packet_type
* is linked into kernel lists and may not be freed until it has been * is linked into kernel lists and may not be freed until it has been
* removed from the kernel lists. * removed from the kernel lists.
*
* This call does not sleep therefore it can not
* guarantee all CPU's that are in middle of receiving packets
* will see the new packet type (until the next received packet).
*/ */
void dev_add_pack(struct packet_type *pt) void dev_add_pack(struct packet_type *pt)
{ {
int hash; int hash;
br_write_lock_bh(BR_NETPROTO_LOCK); spin_lock_bh(&ptype_lock);
#ifdef CONFIG_NET_FASTROUTE #ifdef CONFIG_NET_FASTROUTE
/* Hack to detect packet socket */ /* Hack to detect packet socket */
if (pt->data && (long)(pt->data) != 1) { if (pt->data && (long)(pt->data) != 1) {
...@@ -256,52 +259,76 @@ void dev_add_pack(struct packet_type *pt) ...@@ -256,52 +259,76 @@ void dev_add_pack(struct packet_type *pt)
#endif #endif
if (pt->type == htons(ETH_P_ALL)) { if (pt->type == htons(ETH_P_ALL)) {
netdev_nit++; netdev_nit++;
pt->next = ptype_all; list_add_rcu(&pt->list, &ptype_all);
ptype_all = pt;
} else { } else {
hash = ntohs(pt->type) & 15; hash = ntohs(pt->type) & 15;
pt->next = ptype_base[hash]; list_add_rcu(&pt->list, &ptype_base[hash]);
ptype_base[hash] = pt;
} }
br_write_unlock_bh(BR_NETPROTO_LOCK); spin_unlock_bh(&ptype_lock);
} }
extern void linkwatch_run_queue(void); extern void linkwatch_run_queue(void);
/** /**
* dev_remove_pack - remove packet handler * __dev_remove_pack - remove packet handler
* @pt: packet type declaration * @pt: packet type declaration
* *
* Remove a protocol handler that was previously added to the kernel * Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed * protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function * from the kernel lists and can be freed or reused once this function
* returns. * returns.
*
* The packet type might still be in use by receivers
* and must not be freed until after all the CPU's have gone
* through a quiescent state.
*/ */
void dev_remove_pack(struct packet_type *pt) void __dev_remove_pack(struct packet_type *pt)
{ {
struct packet_type **pt1; struct list_head *head;
struct packet_type *pt1;
br_write_lock_bh(BR_NETPROTO_LOCK); spin_lock_bh(&ptype_lock);
if (pt->type == htons(ETH_P_ALL)) { if (pt->type == htons(ETH_P_ALL)) {
netdev_nit--; netdev_nit--;
pt1 = &ptype_all; head = &ptype_all;
} else } else
pt1 = &ptype_base[ntohs(pt->type) & 15]; head = &ptype_base[ntohs(pt->type) & 15];
for (; *pt1; pt1 = &((*pt1)->next)) { list_for_each_entry(pt1, head, list) {
if (pt == *pt1) { if (pt == pt1) {
*pt1 = pt->next;
#ifdef CONFIG_NET_FASTROUTE #ifdef CONFIG_NET_FASTROUTE
if (pt->data) if (pt->data)
netdev_fastroute_obstacles--; netdev_fastroute_obstacles--;
#endif #endif
list_del_rcu(&pt->list);
goto out; goto out;
} }
} }
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
out: out:
br_write_unlock_bh(BR_NETPROTO_LOCK); spin_unlock_bh(&ptype_lock);
}
/**
* dev_remove_pack - remove packet handler
* @pt: packet type declaration
*
* Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function
* returns.
*
* This call sleeps to guarantee that no CPU is looking at the packet
* type after return.
*/
void dev_remove_pack(struct packet_type *pt)
{
__dev_remove_pack(pt);
synchronize_net();
} }
/****************************************************************************** /******************************************************************************
...@@ -943,8 +970,8 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) ...@@ -943,8 +970,8 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
struct packet_type *ptype; struct packet_type *ptype;
do_gettimeofday(&skb->stamp); do_gettimeofday(&skb->stamp);
br_read_lock(BR_NETPROTO_LOCK); rcu_read_lock();
for (ptype = ptype_all; ptype; ptype = ptype->next) { list_for_each_entry_rcu(ptype, &ptype_all, list) {
/* Never send packets back to the socket /* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org) * they originated from - MvS (miquels@drinkel.ow.org)
*/ */
...@@ -974,7 +1001,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) ...@@ -974,7 +1001,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
ptype->func(skb2, skb->dev, ptype); ptype->func(skb2, skb->dev, ptype);
} }
} }
br_read_unlock(BR_NETPROTO_LOCK); rcu_read_unlock();
} }
/* Calculate csum in the case, when packet is misrouted. /* Calculate csum in the case, when packet is misrouted.
...@@ -1488,7 +1515,8 @@ int netif_receive_skb(struct sk_buff *skb) ...@@ -1488,7 +1515,8 @@ int netif_receive_skb(struct sk_buff *skb)
skb->h.raw = skb->nh.raw = skb->data; skb->h.raw = skb->nh.raw = skb->data;
pt_prev = NULL; pt_prev = NULL;
for (ptype = ptype_all; ptype; ptype = ptype->next) { rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) { if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev) { if (pt_prev) {
if (!pt_prev->data) { if (!pt_prev->data) {
...@@ -1511,17 +1539,15 @@ int netif_receive_skb(struct sk_buff *skb) ...@@ -1511,17 +1539,15 @@ int netif_receive_skb(struct sk_buff *skb)
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
if (skb->dev->br_port) { if (skb->dev->br_port) {
int ret;
ret = handle_bridge(skb, pt_prev); ret = handle_bridge(skb, pt_prev);
if (br_handle_frame_hook(skb) == 0) if (br_handle_frame_hook(skb) == 0)
return ret; goto out;
pt_prev = NULL; pt_prev = NULL;
} }
#endif #endif
for (ptype = ptype_base[ntohs(type) & 15]; ptype; ptype = ptype->next) { list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type && if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) { (!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev) { if (pt_prev) {
...@@ -1552,6 +1578,8 @@ int netif_receive_skb(struct sk_buff *skb) ...@@ -1552,6 +1578,8 @@ int netif_receive_skb(struct sk_buff *skb)
ret = NET_RX_DROP; ret = NET_RX_DROP;
} }
out:
rcu_read_unlock();
return ret; return ret;
} }
...@@ -1625,7 +1653,8 @@ static void net_rx_action(struct softirq_action *h) ...@@ -1625,7 +1653,8 @@ static void net_rx_action(struct softirq_action *h)
unsigned long start_time = jiffies; unsigned long start_time = jiffies;
int budget = netdev_max_backlog; int budget = netdev_max_backlog;
br_read_lock(BR_NETPROTO_LOCK);
preempt_disable();
local_irq_disable(); local_irq_disable();
while (!list_empty(&queue->poll_list)) { while (!list_empty(&queue->poll_list)) {
...@@ -1654,7 +1683,7 @@ static void net_rx_action(struct softirq_action *h) ...@@ -1654,7 +1683,7 @@ static void net_rx_action(struct softirq_action *h)
} }
out: out:
local_irq_enable(); local_irq_enable();
br_read_unlock(BR_NETPROTO_LOCK); preempt_enable();
return; return;
softnet_break: softnet_break:
...@@ -1997,9 +2026,9 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) ...@@ -1997,9 +2026,9 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
dev_hold(master); dev_hold(master);
} }
br_write_lock_bh(BR_NETPROTO_LOCK);
slave->master = master; slave->master = master;
br_write_unlock_bh(BR_NETPROTO_LOCK);
synchronize_net();
if (old) if (old)
dev_put(old); dev_put(old);
...@@ -2663,8 +2692,8 @@ int netdev_finish_unregister(struct net_device *dev) ...@@ -2663,8 +2692,8 @@ int netdev_finish_unregister(struct net_device *dev)
/* Synchronize with packet receive processing. */ /* Synchronize with packet receive processing. */
void synchronize_net(void) void synchronize_net(void)
{ {
br_write_lock_bh(BR_NETPROTO_LOCK); might_sleep();
br_write_unlock_bh(BR_NETPROTO_LOCK); synchronize_kernel();
} }
/** /**
...@@ -2848,6 +2877,10 @@ static int __init net_dev_init(void) ...@@ -2848,6 +2877,10 @@ static int __init net_dev_init(void)
subsystem_register(&net_subsys); subsystem_register(&net_subsys);
INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < 16; i++)
INIT_LIST_HEAD(&ptype_base[i]);
#ifdef CONFIG_NET_DIVERT #ifdef CONFIG_NET_DIVERT
dv_init(); dv_init();
#endif /* CONFIG_NET_DIVERT */ #endif /* CONFIG_NET_DIVERT */
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
/***************************** INCLUDES *****************************/ /***************************** INCLUDES *****************************/
#include <linux/config.h> /* Not needed ??? */ #include <linux/config.h> /* Not needed ??? */
#include <linux/module.h>
#include <linux/types.h> /* off_t */ #include <linux/types.h> /* off_t */
#include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */ #include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
......
...@@ -1013,7 +1013,7 @@ void inet_register_protosw(struct inet_protosw *p) ...@@ -1013,7 +1013,7 @@ void inet_register_protosw(struct inet_protosw *p)
out: out:
spin_unlock_bh(&inetsw_lock); spin_unlock_bh(&inetsw_lock);
synchronize_kernel(); synchronize_net();
return; return;
...@@ -1040,7 +1040,7 @@ void inet_unregister_protosw(struct inet_protosw *p) ...@@ -1040,7 +1040,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
list_del_rcu(&p->list); list_del_rcu(&p->list);
spin_unlock_bh(&inetsw_lock); spin_unlock_bh(&inetsw_lock);
synchronize_kernel(); synchronize_net();
} }
} }
......
...@@ -685,16 +685,6 @@ skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off) ...@@ -685,16 +685,6 @@ skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
return 0; return 0;
} }
static void
skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
{
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
frag->page = page;
frag->page_offset = off;
frag->size = size;
skb_shinfo(skb)->nr_frags = i+1;
}
static inline unsigned int static inline unsigned int
csum_page(struct page *page, int offset, int copy) csum_page(struct page *page, int offset, int copy)
{ {
......
...@@ -269,6 +269,67 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) ...@@ -269,6 +269,67 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
xfrm_state_put(x); xfrm_state_put(x);
} }
/* We always hold one tunnel user reference to indicate a tunnel */
static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
{
struct xfrm_state *t;
t = xfrm_state_alloc();
if (t == NULL)
goto out;
t->id.proto = IPPROTO_IPIP;
t->id.spi = x->props.saddr.a4;
t->id.daddr.a4 = x->id.daddr.a4;
memcpy(&t->sel, &x->sel, sizeof(t->sel));
t->props.family = AF_INET;
t->props.mode = 1;
t->props.saddr.a4 = x->props.saddr.a4;
t->type = xfrm_get_type(IPPROTO_IPIP, t->props.family);
if (t->type == NULL)
goto error;
if (t->type->init_state(t, NULL))
goto error;
t->km.state = XFRM_STATE_VALID;
atomic_set(&t->tunnel_users, 1);
out:
return t;
error:
xfrm_state_put(t);
t = NULL;
goto out;
}
/*
* Must be protected by xfrm_cfg_sem. State and tunnel user references are
* always incremented on success.
*/
static int ipcomp_tunnel_attach(struct xfrm_state *x)
{
int err = 0;
struct xfrm_state *t;
t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4,
x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
if (!t) {
t = ipcomp_tunnel_create(x);
if (!t) {
err = -EINVAL;
goto out;
}
xfrm_state_insert(t);
xfrm_state_hold(t);
}
x->tunnel = t;
atomic_inc(&t->tunnel_users);
out:
return err;
}
static void ipcomp_free_data(struct ipcomp_data *ipcd) static void ipcomp_free_data(struct ipcomp_data *ipcd)
{ {
if (ipcd->tfm) if (ipcd->tfm)
...@@ -308,6 +369,12 @@ static int ipcomp_init_state(struct xfrm_state *x, void *args) ...@@ -308,6 +369,12 @@ static int ipcomp_init_state(struct xfrm_state *x, void *args)
if (!ipcd->tfm) if (!ipcd->tfm)
goto error; goto error;
if (x->props.mode) {
err = ipcomp_tunnel_attach(x);
if (err)
goto error;
}
calg_desc = xfrm_calg_get_byname(x->calg->alg_name); calg_desc = xfrm_calg_get_byname(x->calg->alg_name);
BUG_ON(!calg_desc); BUG_ON(!calg_desc);
ipcd->threshold = calg_desc->uinfo.comp.threshold; ipcd->threshold = calg_desc->uinfo.comp.threshold;
......
...@@ -1188,12 +1188,9 @@ ip_ct_gather_frags(struct sk_buff *skb) ...@@ -1188,12 +1188,9 @@ ip_ct_gather_frags(struct sk_buff *skb)
local_bh_enable(); local_bh_enable();
if (!skb) { if (!skb) {
if (sk) sock_put(sk); if (sk)
sock_put(sk);
return skb; return skb;
} else if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
kfree_skb(skb);
if (sk) sock_put(sk);
return NULL;
} }
if (sk) { if (sk) {
......
...@@ -15,34 +15,10 @@ struct notifier_block; ...@@ -15,34 +15,10 @@ struct notifier_block;
#include <linux/netfilter_ipv4/compat_firewall.h> #include <linux/netfilter_ipv4/compat_firewall.h>
#include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include "ip_fw_compat.h"
static struct firewall_ops *fwops; static struct firewall_ops *fwops;
/* From ip_fw_compat_redir.c */
extern unsigned int
do_redirect(struct sk_buff *skb,
const struct net_device *dev,
u_int16_t redirpt);
extern void
check_for_redirect(struct sk_buff *skb);
extern void
check_for_unredirect(struct sk_buff *skb);
/* From ip_fw_compat_masq.c */
extern unsigned int
do_masquerade(struct sk_buff **pskb, const struct net_device *dev);
extern unsigned int
check_for_masq_error(struct sk_buff **pskb);
extern unsigned int
check_for_demasq(struct sk_buff **pskb);
extern int __init masq_init(void);
extern void masq_cleanup(void);
/* They call these; we do what they want. */ /* They call these; we do what they want. */
int register_firewall(int pf, struct firewall_ops *fw) int register_firewall(int pf, struct firewall_ops *fw)
{ {
......
#ifndef _LINUX_IP_FW_COMPAT_H
#define _LINUX_IP_FW_COMPAT_H
/* From ip_fw_compat_redir.c */
extern unsigned int
do_redirect(struct sk_buff *skb,
const struct net_device *dev,
u_int16_t redirpt);
extern void
check_for_redirect(struct sk_buff *skb);
extern void
check_for_unredirect(struct sk_buff *skb);
/* From ip_fw_compat_masq.c */
extern unsigned int
do_masquerade(struct sk_buff **pskb, const struct net_device *dev);
extern void check_for_masq_error(struct sk_buff **pskb);
extern unsigned int
check_for_demasq(struct sk_buff **pskb);
extern int __init masq_init(void);
extern void masq_cleanup(void);
#endif /* _LINUX_IP_FW_COMPAT_H */
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <linux/netfilter_ipv4/ip_nat.h> #include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_core.h> #include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/listhelp.h> #include <linux/netfilter_ipv4/listhelp.h>
#include "ip_fw_compat.h"
#if 0 #if 0
#define DEBUGP printk #define DEBUGP printk
......
...@@ -28,6 +28,7 @@ static DECLARE_LOCK(redir_lock); ...@@ -28,6 +28,7 @@ static DECLARE_LOCK(redir_lock);
#define ASSERT_WRITE_LOCK(x) MUST_BE_LOCKED(&redir_lock) #define ASSERT_WRITE_LOCK(x) MUST_BE_LOCKED(&redir_lock)
#include <linux/netfilter_ipv4/listhelp.h> #include <linux/netfilter_ipv4/listhelp.h>
#include "ip_fw_compat.h"
#if 0 #if 0
#define DEBUGP printk #define DEBUGP printk
......
...@@ -163,36 +163,32 @@ static int ipip_rcv(struct sk_buff *skb) ...@@ -163,36 +163,32 @@ static int ipip_rcv(struct sk_buff *skb)
skb->nh.iph->saddr, skb->nh.iph->saddr,
IPPROTO_IPIP, AF_INET); IPPROTO_IPIP, AF_INET);
if (x) { if (!x)
goto drop;
spin_lock(&x->lock); spin_lock(&x->lock);
if (unlikely(x->km.state != XFRM_STATE_VALID)) if (unlikely(x->km.state != XFRM_STATE_VALID))
goto drop_unlock; goto drop_unlock;
}
err = ipip_xfrm_rcv(x, NULL, skb); err = ipip_xfrm_rcv(x, NULL, skb);
if (err) if (err)
goto drop_unlock; goto drop_unlock;
if (x) {
x->curlft.bytes += skb->len; x->curlft.bytes += skb->len;
x->curlft.packets++; x->curlft.packets++;
spin_unlock(&x->lock); spin_unlock(&x->lock);
xfrm_state_put(x); xfrm_state_put(x);
} out:
return err;
return 0;
drop_unlock: drop_unlock:
if (x) {
spin_unlock(&x->lock); spin_unlock(&x->lock);
xfrm_state_put(x); xfrm_state_put(x);
} drop:
err = NET_RX_DROP;
kfree_skb(skb); kfree_skb(skb);
out: goto out;
return 0;
} }
static void ipip_err(struct sk_buff *skb, u32 info) static void ipip_err(struct sk_buff *skb, u32 info)
......
...@@ -637,7 +637,7 @@ inet6_unregister_protosw(struct inet_protosw *p) ...@@ -637,7 +637,7 @@ inet6_unregister_protosw(struct inet_protosw *p)
list_del_rcu(&p->list); list_del_rcu(&p->list);
spin_unlock_bh(&inetsw6_lock); spin_unlock_bh(&inetsw6_lock);
synchronize_kernel(); synchronize_net();
} }
} }
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
* YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit. * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
* Randy Dunlap and * Randy Dunlap and
* YOSHIFUJI Hideaki @USAGI: Per-interface statistics support * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
* Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
*/ */
#include <linux/module.h> #include <linux/module.h>
...@@ -104,42 +105,6 @@ static __inline__ void icmpv6_xmit_unlock(void) ...@@ -104,42 +105,6 @@ static __inline__ void icmpv6_xmit_unlock(void)
spin_unlock_bh(&icmpv6_socket->sk->lock.slock); spin_unlock_bh(&icmpv6_socket->sk->lock.slock);
} }
/*
* getfrag callback
*/
static int icmpv6_getfrag(const void *data, struct in6_addr *saddr,
char *buff, unsigned int offset, unsigned int len)
{
struct icmpv6_msg *msg = (struct icmpv6_msg *) data;
struct icmp6hdr *icmph;
__u32 csum;
if (offset) {
csum = skb_copy_and_csum_bits(msg->skb, msg->offset +
(offset - sizeof(struct icmp6hdr)),
buff, len, msg->csum);
msg->csum = csum;
return 0;
}
csum = csum_partial_copy_nocheck((void *) &msg->icmph, buff,
sizeof(struct icmp6hdr), msg->csum);
csum = skb_copy_and_csum_bits(msg->skb, msg->offset,
buff + sizeof(struct icmp6hdr),
len - sizeof(struct icmp6hdr), csum);
icmph = (struct icmp6hdr *) buff;
icmph->icmp6_cksum = csum_ipv6_magic(saddr, msg->daddr, msg->len,
IPPROTO_ICMPV6, csum);
return 0;
}
/* /*
* Slightly more convenient version of icmpv6_send. * Slightly more convenient version of icmpv6_send.
*/ */
...@@ -242,22 +207,74 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) ...@@ -242,22 +207,74 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
return (optval&0xC0) == 0x80; return (optval&0xC0) == 0x80;
} }
int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
{
struct sk_buff *skb;
struct icmp6hdr *icmp6h;
int err = 0;
if ((skb = skb_peek(&sk->write_queue)) == NULL)
goto out;
icmp6h = (struct icmp6hdr*) skb->h.raw;
memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
icmp6h->icmp6_cksum = 0;
if (skb_queue_len(&sk->write_queue) == 1) {
skb->csum = csum_partial((char *)icmp6h,
sizeof(struct icmp6hdr), skb->csum);
icmp6h->icmp6_cksum = csum_ipv6_magic(fl->fl6_src,
fl->fl6_dst,
len, fl->proto, skb->csum);
} else {
u32 tmp_csum = 0;
skb_queue_walk(&sk->write_queue, skb) {
tmp_csum = csum_add(tmp_csum, skb->csum);
}
tmp_csum = csum_partial((char *)icmp6h,
sizeof(struct icmp6hdr), tmp_csum);
tmp_csum = csum_ipv6_magic(fl->fl6_src,
fl->fl6_dst,
len, fl->proto, tmp_csum);
icmp6h->icmp6_cksum = tmp_csum;
}
if (icmp6h->icmp6_cksum == 0)
icmp6h->icmp6_cksum = -1;
ip6_push_pending_frames(sk);
out:
return err;
}
static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
struct sk_buff *org_skb = (struct sk_buff *)from;
__u32 csum = 0;
csum = skb_copy_and_csum_bits(org_skb, offset, to, len, csum);
skb->csum = csum_block_add(skb->csum, csum, odd);
return 0;
}
/* /*
* Send an ICMP message in response to a packet in error * Send an ICMP message in response to a packet in error
*/ */
void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
struct net_device *dev) struct net_device *dev)
{ {
struct inet6_dev *idev; struct inet6_dev *idev;
struct ipv6hdr *hdr = skb->nh.ipv6h; struct ipv6hdr *hdr = skb->nh.ipv6h;
struct sock *sk = icmpv6_socket->sk; struct sock *sk = icmpv6_socket->sk;
struct in6_addr *saddr = NULL; struct ipv6_pinfo *np = inet6_sk(sk);
int iif = 0; struct in6_addr *saddr = NULL, *tmp_saddr = NULL;
struct icmpv6_msg msg; struct dst_entry *dst;
struct icmp6hdr tmp_hdr;
struct flowi fl; struct flowi fl;
int iif = 0;
int addr_type = 0; int addr_type = 0;
int len; int len, plen;
int hlimit = -1;
int err = 0;
if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail) if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
return; return;
...@@ -328,36 +345,48 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, ...@@ -328,36 +345,48 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
if (!icmpv6_xrlim_allow(sk, type, &fl)) if (!icmpv6_xrlim_allow(sk, type, &fl))
goto out; goto out;
/* tmp_hdr.icmp6_type = type;
* ok. kick it. checksum will be provided by the tmp_hdr.icmp6_code = code;
* getfrag_t callback. tmp_hdr.icmp6_cksum = 0;
*/ tmp_hdr.icmp6_pointer = htonl(info);
msg.icmph.icmp6_type = type; if (!fl.oif && ipv6_addr_is_multicast(fl.fl6_dst))
msg.icmph.icmp6_code = code; fl.oif = np->mcast_oif;
msg.icmph.icmp6_cksum = 0;
msg.icmph.icmp6_pointer = htonl(info);
msg.skb = skb; err = ip6_dst_lookup(sk, &dst, &fl, &tmp_saddr);
msg.offset = skb->nh.raw - skb->data; if (err) goto out;
msg.csum = 0;
msg.daddr = &hdr->saddr;
len = skb->len - msg.offset + sizeof(struct icmp6hdr); if (hlimit < 0) {
len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr)); if (ipv6_addr_is_multicast(fl.fl6_dst))
hlimit = np->mcast_hops;
else
hlimit = np->hop_limit;
if (hlimit < 0)
hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
}
plen = skb->nh.raw - skb->data;
__skb_pull(skb, plen);
len = skb->len;
len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
if (len < 0) { if (len < 0) {
if (net_ratelimit()) if (net_ratelimit())
printk(KERN_DEBUG "icmp: len problem\n"); printk(KERN_DEBUG "icmp: len problem\n");
__skb_push(skb, plen);
goto out; goto out;
} }
msg.len = len;
idev = in6_dev_get(skb->dev); idev = in6_dev_get(skb->dev);
ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1, err = ip6_append_data(sk, icmpv6_getfrag, skb, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr),
MSG_DONTWAIT); hlimit, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT);
if (err) {
ip6_flush_pending_frames(sk);
goto out;
}
err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
__skb_push(skb, plen);
if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB) if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
ICMP6_INC_STATS_OFFSET_BH(idev, Icmp6OutDestUnreachs, type - ICMPV6_DEST_UNREACH); ICMP6_INC_STATS_OFFSET_BH(idev, Icmp6OutDestUnreachs, type - ICMPV6_DEST_UNREACH);
ICMP6_INC_STATS_BH(idev, Icmp6OutMsgs); ICMP6_INC_STATS_BH(idev, Icmp6OutMsgs);
...@@ -365,6 +394,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, ...@@ -365,6 +394,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
if (likely(idev != NULL)) if (likely(idev != NULL))
in6_dev_put(idev); in6_dev_put(idev);
out: out:
if (tmp_saddr) kfree(tmp_saddr);
icmpv6_xmit_unlock(); icmpv6_xmit_unlock();
} }
...@@ -372,10 +402,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ...@@ -372,10 +402,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
{ {
struct sock *sk = icmpv6_socket->sk; struct sock *sk = icmpv6_socket->sk;
struct inet6_dev *idev; struct inet6_dev *idev;
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *saddr = NULL, *tmp_saddr = NULL;
struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw; struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
struct in6_addr *saddr; struct icmp6hdr tmp_hdr;
struct icmpv6_msg msg;
struct flowi fl; struct flowi fl;
struct dst_entry *dst;
int err = 0;
int hlimit = -1;
saddr = &skb->nh.ipv6h->daddr; saddr = &skb->nh.ipv6h->daddr;
...@@ -383,39 +417,55 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ...@@ -383,39 +417,55 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
ipv6_chk_acast_addr(0, saddr)) ipv6_chk_acast_addr(0, saddr))
saddr = NULL; saddr = NULL;
msg.icmph.icmp6_type = ICMPV6_ECHO_REPLY; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
msg.icmph.icmp6_code = 0; tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
msg.icmph.icmp6_cksum = 0;
msg.icmph.icmp6_identifier = icmph->icmp6_identifier;
msg.icmph.icmp6_sequence = icmph->icmp6_sequence;
msg.skb = skb;
msg.offset = 0;
msg.csum = 0;
msg.len = skb->len + sizeof(struct icmp6hdr);
msg.daddr = &skb->nh.ipv6h->saddr;
fl.proto = IPPROTO_ICMPV6; fl.proto = IPPROTO_ICMPV6;
fl.fl6_dst = msg.daddr; fl.fl6_dst = &skb->nh.ipv6h->saddr;
fl.fl6_src = saddr; fl.fl6_src = saddr;
fl.oif = skb->dev->ifindex; fl.oif = skb->dev->ifindex;
fl.fl6_flowlabel = 0; fl.fl6_flowlabel = 0;
fl.fl_icmp_type = ICMPV6_ECHO_REPLY; fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
fl.fl_icmp_code = 0; fl.fl_icmp_code = 0;
icmpv6_xmit_lock();
if (!fl.oif && ipv6_addr_is_multicast(fl.nl_u.ip6_u.daddr))
fl.oif = np->mcast_oif;
err = ip6_dst_lookup(sk, &dst, &fl, &tmp_saddr);
if (err) goto out;
if (hlimit < 0) {
if (ipv6_addr_is_multicast(fl.fl6_dst))
hlimit = np->mcast_hops;
else
hlimit = np->hop_limit;
if (hlimit < 0)
hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
}
idev = in6_dev_get(skb->dev); idev = in6_dev_get(skb->dev);
icmpv6_xmit_lock(); err = ip6_append_data(sk, icmpv6_getfrag, skb, skb->len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), hlimit, NULL, &fl,
(struct rt6_info*)dst, MSG_DONTWAIT);
if (err) {
ip6_flush_pending_frames(sk);
goto out;
}
err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, msg.len, NULL, -1,
MSG_DONTWAIT);
ICMP6_INC_STATS_BH(idev, Icmp6OutEchoReplies); ICMP6_INC_STATS_BH(idev, Icmp6OutEchoReplies);
ICMP6_INC_STATS_BH(idev, Icmp6OutMsgs); ICMP6_INC_STATS_BH(idev, Icmp6OutMsgs);
icmpv6_xmit_unlock();
if (likely(idev != NULL)) if (likely(idev != NULL))
in6_dev_put(idev); in6_dev_put(idev);
out:
if (tmp_saddr) kfree(tmp_saddr);
icmpv6_xmit_unlock();
} }
static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
......
...@@ -23,6 +23,9 @@ ...@@ -23,6 +23,9 @@
* *
* H. von Brand : Added missing #include <linux/string.h> * H. von Brand : Added missing #include <linux/string.h>
* Imran Patel : frag id should be in NBO * Imran Patel : frag id should be in NBO
* Kazunori MIYAZAWA @USAGI
* : add ip6_append_data and related functions
* for datagram xmit
*/ */
#include <linux/config.h> #include <linux/config.h>
...@@ -52,6 +55,8 @@ ...@@ -52,6 +55,8 @@
#include <net/icmp.h> #include <net/icmp.h>
#include <net/xfrm.h> #include <net/xfrm.h>
static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr) static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
{ {
static u32 ipv6_fragmentation_id = 1; static u32 ipv6_fragmentation_id = 1;
...@@ -98,7 +103,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) ...@@ -98,7 +103,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
} }
int ip6_output(struct sk_buff *skb) int ip6_output2(struct sk_buff *skb)
{ {
struct dst_entry *dst = skb->dst; struct dst_entry *dst = skb->dst;
struct net_device *dev = dst->dev; struct net_device *dev = dst->dev;
...@@ -134,6 +139,13 @@ int ip6_output(struct sk_buff *skb) ...@@ -134,6 +139,13 @@ int ip6_output(struct sk_buff *skb)
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
} }
int ip6_output(struct sk_buff *skb)
{
if ((skb->len > skb->dst->dev->mtu || skb_shinfo(skb)->frag_list))
return ip6_fragment(skb, ip6_output2);
else
return ip6_output2(skb);
}
#ifdef CONFIG_NETFILTER #ifdef CONFIG_NETFILTER
int ip6_route_me_harder(struct sk_buff *skb) int ip6_route_me_harder(struct sk_buff *skb)
...@@ -847,3 +859,658 @@ int ip6_forward(struct sk_buff *skb) ...@@ -847,3 +859,658 @@ int ip6_forward(struct sk_buff *skb)
kfree_skb(skb); kfree_skb(skb);
return -EINVAL; return -EINVAL;
} }
static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
to->pkt_type = from->pkt_type;
to->priority = from->priority;
to->protocol = from->protocol;
to->security = from->security;
to->dst = dst_clone(from->dst);
to->dev = from->dev;
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif
#ifdef CONFIG_NETFILTER
to->nfmark = from->nfmark;
/* Connection association is same as pre-frag packet */
to->nfct = from->nfct;
nf_conntrack_get(to->nfct);
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
to->nf_bridge = from->nf_bridge;
nf_bridge_get(to->nf_bridge);
#endif
#ifdef CONFIG_NETFILTER_DEBUG
to->nf_debug = from->nf_debug;
#endif
#endif
}
static int ip6_found_nexthdr(struct sk_buff *skb, u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
unsigned int packet_len = skb->tail - skb->nh.raw;
int found_rhdr = 0;
*nexthdr = &skb->nh.ipv6h->nexthdr;
while (offset + 1 <= packet_len) {
switch (**nexthdr) {
case NEXTHDR_HOP:
case NEXTHDR_ROUTING:
case NEXTHDR_DEST:
if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
offset += ipv6_optlen(exthdr);
*nexthdr = &exthdr->nexthdr;
exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
break;
default :
return offset;
}
}
return offset;
}
static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
{
struct net_device *dev;
struct rt6_info *rt = (struct rt6_info*)skb->dst;
struct sk_buff *frag;
struct ipv6hdr *tmp_hdr;
struct frag_hdr *fh;
unsigned int mtu, hlen, left, len;
u32 frag_id = 0;
int ptr, offset = 0, err=0;
u8 *prevhdr, nexthdr = 0;
dev = rt->u.dst.dev;
hlen = ip6_found_nexthdr(skb, &prevhdr);
nexthdr = *prevhdr;
mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
if (skb_shinfo(skb)->frag_list) {
int first_len = 0;
if (first_len - hlen > mtu ||
((first_len - hlen) & 7) ||
skb_cloned(skb))
goto slow_path;
for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
/* Correct geometry. */
if (frag->len > mtu ||
((frag->len & 7) && frag->next) ||
skb_headroom(frag) < hlen)
goto slow_path;
/* Correct socket ownership. */
if (frag->sk == NULL)
goto slow_path;
/* Partially cloned skb? */
if (skb_shared(frag))
goto slow_path;
}
err = 0;
offset = 0;
frag = skb_shinfo(skb)->frag_list;
skb_shinfo(skb)->frag_list = 0;
/* BUILD HEADER */
tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
if (!tmp_hdr) {
IP6_INC_STATS(Ip6FragFails);
return -ENOMEM;
}
*prevhdr = NEXTHDR_FRAGMENT;
memcpy(tmp_hdr, skb->nh.raw, hlen);
__skb_pull(skb, hlen);
fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
skb->nh.raw = __skb_push(skb, hlen);
memcpy(skb->nh.raw, tmp_hdr, hlen);
ipv6_select_ident(skb, fh);
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(0x0001);
frag_id = fh->identification;
first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
if (frag) {
frag->h.raw = frag->data;
fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
frag->nh.raw = __skb_push(frag, hlen);
memcpy(frag->nh.raw, tmp_hdr, hlen);
offset += skb->len - hlen - sizeof(struct frag_hdr);
fh->nexthdr = nexthdr;
fh->reserved = 0;
if (frag->next != NULL)
offset |= 0x0001;
fh->frag_off = htons(offset);
fh->identification = frag_id;
frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
ip6_copy_metadata(frag, skb);
}
err = output(skb);
if (err || !frag)
break;
skb = frag;
frag = skb->next;
skb->next = NULL;
}
if (tmp_hdr)
kfree(tmp_hdr);
if (err == 0) {
IP6_INC_STATS(Ip6FragOKs);
return 0;
}
while (frag) {
skb = frag->next;
kfree_skb(frag);
frag = skb;
}
IP6_INC_STATS(Ip6FragFails);
return err;
}
slow_path:
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
/*
* Fragment the datagram.
*/
*prevhdr = NEXTHDR_FRAGMENT;
/*
* Keep copying data until we run out.
*/
while(left > 0) {
len = left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
len = mtu;
/* IF: we are not sending upto and including the packet end
then align the next start on an eight byte boundary */
if (len < left) {
len &= ~7;
}
/*
* Allocate buffer.
*/
if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
err = -ENOMEM;
goto fail;
}
/*
* Set up data on packet
*/
ip6_copy_metadata(frag, skb);
skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
skb_put(frag, len + hlen + sizeof(struct frag_hdr));
frag->nh.raw = frag->data;
fh = (struct frag_hdr*)(frag->data + hlen);
frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
/*
* Charge the memory for the fragment to any owner
* it might possess
*/
if (skb->sk)
skb_set_owner_w(frag, skb->sk);
/*
* Copy the packet header into the new buffer.
*/
memcpy(frag->nh.raw, skb->data, hlen);
/*
* Build fragment header.
*/
fh->nexthdr = nexthdr;
fh->reserved = 0;
if (frag_id) {
ipv6_select_ident(skb, fh);
frag_id = fh->identification;
} else
fh->identification = frag_id;
/*
* Copy a block of the IP datagram.
*/
if (skb_copy_bits(skb, ptr, frag->h.raw, len))
BUG();
left -= len;
fh->frag_off = htons( left > 0 ? (offset | 0x0001) : offset);
frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
ptr += len;
offset += len;
/*
* Put this fragment into the sending queue.
*/
IP6_INC_STATS(Ip6FragCreates);
err = output(frag);
if (err)
goto fail;
}
kfree_skb(skb);
IP6_INC_STATS(Ip6FragOKs);
return err;
fail:
kfree_skb(skb);
IP6_INC_STATS(Ip6FragFails);
return err;
}
int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl, struct in6_addr **saddr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
int err = 0;
*dst = __sk_dst_check(sk, np->dst_cookie);
if (*dst) {
struct rt6_info *rt = (struct rt6_info*)*dst;
/* Yes, checking route validity in not connected
case is not very simple. Take into account,
that we do not support routing by source, TOS,
and MSG_DONTROUTE --ANK (980726)
1. If route was host route, check that
cached destination is current.
If it is network route, we still may
check its validity using saved pointer
to the last used address: daddr_cache.
We do not want to save whole address now,
(because main consumer of this service
is tcp, which has not this problem),
so that the last trick works only on connected
sockets.
2. oif also should be the same.
*/
if (((rt->rt6i_dst.plen != 128 ||
ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr))
&& (np->daddr_cache == NULL ||
ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache)))
|| (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
*dst = NULL;
} else
dst_hold(*dst);
}
if (*dst == NULL)
*dst = ip6_route_output(sk, fl);
if ((*dst)->error) {
IP6_INC_STATS(Ip6OutNoRoutes);
dst_release(*dst);
return -ENETUNREACH;
}
if (fl->fl6_src == NULL) {
*saddr = kmalloc(sizeof(struct in6_addr), GFP_ATOMIC);
err = ipv6_get_saddr(*dst, fl->fl6_dst, *saddr);
if (err) {
#if IP6_DEBUG >= 2
printk(KERN_DEBUG "ip6_build_xmit: "
"no availiable source address\n");
#endif
return err;
}
fl->fl6_src = *saddr;
}
if (*dst) {
if ((err = xfrm_lookup(dst, fl, sk, 0)) < 0) {
dst_release(*dst);
return -ENETUNREACH;
}
}
return 0;
}
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
unsigned int flags)
{
struct inet_opt *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff *skb;
unsigned int maxfraglen, fragheaderlen;
int exthdrlen;
int hh_len;
int mtu;
int copy = 0;
int err;
int offset = 0;
int csummode = CHECKSUM_NONE;
if (flags&MSG_PROBE)
return 0;
if (skb_queue_empty(&sk->write_queue)) {
/*
* setup for corking
*/
if (opt) {
if (np->cork.opt == NULL)
np->cork.opt = kmalloc(opt->tot_len, sk->allocation);
memcpy(np->cork.opt, opt, opt->tot_len);
inet->cork.flags |= IPCORK_OPT;
/* need source address above miyazawa*/
exthdrlen += opt->opt_flen ? opt->opt_flen : 0;
}
dst_hold(&rt->u.dst);
np->cork.rt = rt;
np->cork.fl = fl;
inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
inet->cork.length = 0;
inet->sndmsg_page = NULL;
inet->sndmsg_off = 0;
if ((exthdrlen = rt->u.dst.header_len) != 0) {
length += exthdrlen;
transhdrlen += exthdrlen;
}
} else {
rt = np->cork.rt;
if (inet->cork.flags & IPCORK_OPT)
opt = np->cork.opt;
transhdrlen = 0;
exthdrlen = 0;
mtu = inet->cork.fragsize;
}
hh_len = (rt->u.dst.dev->hard_header_len&~15) + 16;
fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
if (mtu < 65576) {
if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
return -EMSGSIZE;
}
}
inet->cork.length += length;
if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
goto alloc_new_skb;
while (length > 0) {
if ((copy = maxfraglen - skb->len) <= 0) {
char *data;
unsigned int datalen;
unsigned int fraglen;
unsigned int alloclen;
BUG_TRAP(copy == 0);
alloc_new_skb:
datalen = maxfraglen - fragheaderlen;
if (datalen > length)
datalen = length;
fraglen = datalen + fragheaderlen;
if ((flags & MSG_MORE) &&
!(rt->u.dst.dev->features&NETIF_F_SG))
alloclen = maxfraglen;
else
alloclen = fraglen;
alloclen += sizeof(struct frag_hdr);
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len + 15,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (atomic_read(&sk->wmem_alloc) <= 2*sk->sndbuf)
skb = sock_wmalloc(sk,
alloclen + hh_len + 15, 1,
sk->allocation);
if (unlikely(skb == NULL))
err = -ENOBUFS;
}
if (skb == NULL)
goto error;
/*
* Fill in the control structures
*/
skb->ip_summed = csummode;
skb->csum = 0;
/* reserve 8 byte for fragmentation */
skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
/*
* Find where to start putting bytes
*/
data = skb_put(skb, fraglen);
skb->nh.raw = data + exthdrlen;
data += fragheaderlen;
skb->h.raw = data + exthdrlen;
copy = datalen - transhdrlen;
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
}
offset += copy;
length -= datalen;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
/*
* Put the packet on the pending queue
*/
__skb_queue_tail(&sk->write_queue, skb);
continue;
}
if (copy > length)
copy = length;
if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
unsigned int off;
off = skb->len;
if (getfrag(from, skb_put(skb, copy),
offset, copy, off, skb) < 0) {
__skb_trim(skb, off);
err = -EFAULT;
goto error;
}
} else {
int i = skb_shinfo(skb)->nr_frags;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
struct page *page = inet->sndmsg_page;
int off = inet->sndmsg_off;
unsigned int left;
if (page && (left = PAGE_SIZE - off) > 0) {
if (copy >= left)
copy = left;
if (page != frag->page) {
if (i == MAX_SKB_FRAGS) {
err = -EMSGSIZE;
goto error;
}
get_page(page);
skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
} else if(i < MAX_SKB_FRAGS) {
if (copy > PAGE_SIZE)
copy = PAGE_SIZE;
page = alloc_pages(sk->allocation, 0);
if (page == NULL) {
err = -ENOMEM;
goto error;
}
inet->sndmsg_page = page;
inet->sndmsg_off = 0;
skb_fill_page_desc(skb, i, page, 0, 0);
frag = &skb_shinfo(skb)->frags[i];
skb->truesize += PAGE_SIZE;
atomic_add(PAGE_SIZE, &sk->wmem_alloc);
} else {
err = -EMSGSIZE;
goto error;
}
if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
err = -EFAULT;
goto error;
}
inet->sndmsg_off += copy;
frag->size += copy;
skb->len += copy;
skb->data_len += copy;
}
offset += copy;
length -= copy;
}
return 0;
error:
inet->cork.length -= length;
IP6_INC_STATS(Ip6OutDiscards);
return err;
}
int ip6_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct in6_addr *final_dst = NULL;
struct inet_opt *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = np->cork.opt;
struct rt6_info *rt = np->cork.rt;
struct flowi *fl = np->cork.fl;
unsigned char proto = fl->proto;
int err = 0;
if ((skb = __skb_dequeue(&sk->write_queue)) == NULL)
goto out;
tail_skb = &(skb_shinfo(skb)->frag_list);
/* move skb->data to ip header from ext header */
if (skb->data < skb->nh.raw)
__skb_pull(skb, skb->nh.raw - skb->data);
while ((tmp_skb = __skb_dequeue(&sk->write_queue)) != NULL) {
__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
*tail_skb = tmp_skb;
tail_skb = &(tmp_skb->next);
skb->len += tmp_skb->len;
skb->data_len += tmp_skb->len;
#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
skb->truesize += tmp_skb->truesize;
__sock_put(tmp_skb->sk);
tmp_skb->destructor = NULL;
tmp_skb->sk = NULL;
#endif
}
final_dst = fl->fl6_dst;
__skb_pull(skb, skb->h.raw - skb->nh.raw);
if (opt && opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
if (opt && opt->opt_nflen)
ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
*(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
if (skb->len < 65536)
hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
else
hdr->payload_len = 0;
hdr->hop_limit = np->hop_limit;
hdr->nexthdr = proto;
memcpy(&hdr->saddr, fl->fl6_src, sizeof(struct in6_addr));
memcpy(&hdr->daddr, final_dst, sizeof(struct in6_addr));
skb->dst = dst_clone(&rt->u.dst);
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
if (err) {
if (err > 0)
err = inet->recverr ? net_xmit_errno(err) : 0;
if (err)
goto error;
}
out:
inet->cork.flags &= ~IPCORK_OPT;
if (np->cork.opt) {
kfree(np->cork.opt);
np->cork.opt = NULL;
}
if (np->cork.rt) {
np->cork.rt = NULL;
}
if (np->cork.fl) {
np->cork.fl = NULL;
}
return err;
error:
goto out;
}
void ip6_flush_pending_frames(struct sock *sk)
{
struct inet_opt *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff *skb;
while ((skb = __skb_dequeue_tail(&sk->write_queue)) != NULL)
kfree_skb(skb);
inet->cork.flags &= ~IPCORK_OPT;
if (np->cork.opt) {
kfree(np->cork.opt);
np->cork.opt = NULL;
}
if (np->cork.rt) {
np->cork.rt = NULL;
}
if (np->cork.fl) {
np->cork.fl = NULL;
}
}
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
* Fixes: * Fixes:
* Hideaki YOSHIFUJI : sin6_scope_id support * Hideaki YOSHIFUJI : sin6_scope_id support
* YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance)
* Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License * modify it under the terms of the GNU General Public License
...@@ -29,6 +30,8 @@ ...@@ -29,6 +30,8 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/if_arp.h> #include <linux/if_arp.h>
#include <linux/icmpv6.h> #include <linux/icmpv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/ioctls.h> #include <asm/ioctls.h>
...@@ -438,87 +441,115 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg ...@@ -438,87 +441,115 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
goto out_free; goto out_free;
} }
/* static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct raw6_opt *opt, int len)
* Sending... {
*/ struct sk_buff *skb;
int err = 0;
u16 *csum;
struct rawv6_fakehdr { if ((skb = skb_peek(&sk->write_queue)) == NULL)
struct iovec *iov; goto out;
struct sock *sk;
__u32 len;
__u32 cksum;
__u32 proto;
struct in6_addr *daddr;
};
static int rawv6_getfrag(const void *data, struct in6_addr *saddr, if (opt->offset + 1 < len)
char *buff, unsigned int offset, unsigned int len) csum = (u16 *)(skb->h.raw + opt->offset);
{ else {
struct iovec *iov = (struct iovec *) data; err = -EINVAL;
goto out;
}
if (skb_queue_len(&sk->write_queue) == 1) {
/*
* Only one fragment on the socket.
*/
/* should be check HW csum miyazawa */
*csum = csum_ipv6_magic(fl->fl6_src,
fl->fl6_dst,
len, fl->proto, skb->csum);
} else {
u32 tmp_csum = 0;
skb_queue_walk(&sk->write_queue, skb) {
tmp_csum = csum_add(tmp_csum, skb->csum);
}
return memcpy_fromiovecend(buff, iov, offset, len); tmp_csum = csum_ipv6_magic(fl->fl6_src,
fl->fl6_dst,
len, fl->proto, tmp_csum);
*csum = tmp_csum;
}
if (*csum == 0)
*csum = -1;
ip6_push_pending_frames(sk);
out:
return err;
} }
static int rawv6_frag_cksum(const void *data, struct in6_addr *addr, static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
char *buff, unsigned int offset, struct flowi *fl, struct rt6_info *rt,
unsigned int len) unsigned int flags)
{ {
struct rawv6_fakehdr *hdr = (struct rawv6_fakehdr *) data; struct inet_opt *inet = inet_sk(sk);
struct ipv6hdr *iph;
struct sk_buff *skb;
unsigned int hh_len;
int err;
if (csum_partial_copy_fromiovecend(buff, hdr->iov, offset, if (length > rt->u.dst.dev->mtu) {
len, &hdr->cksum)) ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
return -EFAULT; return -EMSGSIZE;
}
if (flags&MSG_PROBE)
goto out;
if (offset == 0) { hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
struct sock *sk;
struct raw6_opt *opt;
struct in6_addr *daddr;
sk = hdr->sk; skb = sock_alloc_send_skb(sk, length+hh_len+15,
opt = raw6_sk(sk); flags&MSG_DONTWAIT, &err);
if (skb == NULL)
goto error;
skb_reserve(skb, hh_len);
if (hdr->daddr) skb->priority = sk->priority;
daddr = hdr->daddr; skb->dst = dst_clone(&rt->u.dst);
else
daddr = addr + 1;
hdr->cksum = csum_ipv6_magic(addr, daddr, hdr->len, skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
hdr->proto, hdr->cksum);
if (opt->offset + 1 < len) { skb->ip_summed = CHECKSUM_NONE;
__u16 *csum;
csum = (__u16 *) (buff + opt->offset); skb->h.raw = skb->nh.raw;
if (*csum) { err = memcpy_fromiovecend((void *)iph, from, 0, length);
/* in case cksum was not initialized */ if (err)
__u32 sum = hdr->cksum; goto error_fault;
sum += *csum;
*csum = hdr->cksum = (sum + (sum>>16));
} else {
*csum = hdr->cksum;
}
} else {
if (net_ratelimit())
printk(KERN_DEBUG "icmp: cksum offset too big\n");
return -EINVAL;
}
}
return 0;
}
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
dst_output);
if (err > 0)
err = inet->recverr ? net_xmit_errno(err) : 0;
if (err)
goto error;
out:
return 0;
error_fault:
err = -EFAULT;
kfree_skb(skb);
error:
IP6_INC_STATS(Ip6OutDiscards);
return err;
}
static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int len) static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int len)
{ {
struct ipv6_txoptions opt_space; struct ipv6_txoptions opt_space;
struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
struct in6_addr *daddr, *saddr = NULL;
struct inet_opt *inet = inet_sk(sk); struct inet_opt *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk);
struct raw6_opt *raw_opt = raw6_sk(sk);
struct ipv6_txoptions *opt = NULL; struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL; struct ip6_flowlabel *flowlabel = NULL;
struct dst_entry *dst = NULL;
struct flowi fl; struct flowi fl;
int addr_len = msg->msg_namelen; int addr_len = msg->msg_namelen;
struct in6_addr *daddr;
struct raw6_opt *raw_opt;
int hlimit = -1; int hlimit = -1;
u16 proto; u16 proto;
int err; int err;
...@@ -552,6 +583,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg ...@@ -552,6 +583,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
if (!proto) if (!proto)
proto = inet->num; proto = inet->num;
else if (proto != inet->num)
return(-EINVAL);
if (proto > 255) if (proto > 255)
return(-EINVAL); return(-EINVAL);
...@@ -590,6 +623,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg ...@@ -590,6 +623,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
* unspecfied destination address * unspecfied destination address
* treated as error... is this correct ? * treated as error... is this correct ?
*/ */
fl6_sock_release(flowlabel);
return(-EINVAL); return(-EINVAL);
} }
...@@ -619,8 +653,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg ...@@ -619,8 +653,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
if (flowlabel) if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = fl6_merge_options(&opt_space, flowlabel, opt);
raw_opt = raw6_sk(sk);
fl.proto = proto; fl.proto = proto;
fl.fl6_dst = daddr; fl.fl6_dst = daddr;
if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr)) if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
...@@ -628,30 +660,64 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg ...@@ -628,30 +660,64 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
fl.fl_icmp_type = 0; fl.fl_icmp_type = 0;
fl.fl_icmp_code = 0; fl.fl_icmp_code = 0;
if (raw_opt->checksum) { /* merge ip6_build_xmit from ip6_output */
struct rawv6_fakehdr hdr; if (opt && opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
fl.fl6_dst = rt0->addr;
}
if (!fl.oif && ipv6_addr_is_multicast(fl.nl_u.ip6_u.daddr))
fl.oif = np->mcast_oif;
hdr.iov = msg->msg_iov; err = ip6_dst_lookup(sk, &dst, &fl, &saddr);
hdr.sk = sk; if (err) goto out;
hdr.len = len;
hdr.cksum = 0;
hdr.proto = proto;
if (opt && opt->srcrt) if (hlimit < 0) {
hdr.daddr = daddr; if (ipv6_addr_is_multicast(fl.fl6_dst))
hlimit = np->mcast_hops;
else else
hdr.daddr = NULL; hlimit = np->hop_limit;
if (hlimit < 0)
hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
}
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
err = ip6_build_xmit(sk, rawv6_frag_cksum, &hdr, &fl, len, back_from_confirm:
opt, hlimit, msg->msg_flags); if (inet->hdrincl) {
err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
} else { } else {
err = ip6_build_xmit(sk, rawv6_getfrag, msg->msg_iov, &fl, len, lock_sock(sk);
opt, hlimit, msg->msg_flags); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
hlimit, opt, &fl, (struct rt6_info*)dst, msg->msg_flags);
if (err)
ip6_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE)) {
if (raw_opt->checksum) {
err = rawv6_push_pending_frames(sk, &fl, raw_opt, len);
} else {
err = ip6_push_pending_frames(sk);
}
}
} }
done:
ip6_dst_store(sk, dst, fl.nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
if (err > 0)
err = np->recverr ? net_xmit_errno(err) : 0;
release_sock(sk);
out:
fl6_sock_release(flowlabel); fl6_sock_release(flowlabel);
if (saddr) kfree(saddr);
return err<0?err:len; return err<0?err:len;
do_confirm:
dst_confirm(dst);
if (!(msg->msg_flags & MSG_PROBE) || len)
goto back_from_confirm;
err = 0;
goto done;
} }
static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
* a single port at the same time. * a single port at the same time.
* Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License * modify it under the terms of the GNU General Public License
...@@ -738,96 +739,117 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) ...@@ -738,96 +739,117 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
kfree_skb(skb); kfree_skb(skb);
return(0); return(0);
} }
/* /*
* Sending * Throw away all pending data and cancel the corking. Socket is locked.
*/ */
static void udp_v6_flush_pending_frames(struct sock *sk)
struct udpv6fakehdr
{ {
struct udphdr uh; struct udp_opt *up = udp_sk(sk);
struct iovec *iov;
__u32 wcheck; if (up->pending) {
__u32 pl_len; up->pending = 0;
struct in6_addr *daddr; ip6_flush_pending_frames(sk);
}; }
}
/* /*
* with checksum * Sending
*/ */
static int udpv6_getfrag(const void *data, struct in6_addr *addr, static int udp_v6_push_pending_frames(struct sock *sk, struct udp_opt *up)
char *buff, unsigned int offset, unsigned int len)
{ {
struct udpv6fakehdr *udh = (struct udpv6fakehdr *) data; struct sk_buff *skb;
char *dst; struct udphdr *uh;
int final = 0; struct ipv6_pinfo *np = inet6_sk(sk);
int clen = len; struct flowi *fl = np->cork.fl;
int err = 0;
dst = buff;
if (offset) {
offset -= sizeof(struct udphdr);
} else {
dst += sizeof(struct udphdr);
final = 1;
clen -= sizeof(struct udphdr);
}
if (csum_partial_copy_fromiovecend(dst, udh->iov, offset, /* Grab the skbuff where UDP header space exists. */
clen, &udh->wcheck)) if ((skb = skb_peek(&sk->write_queue)) == NULL)
return -EFAULT; goto out;
if (final) { /*
struct in6_addr *daddr; * Create a UDP header
*/
uh = skb->h.uh;
uh->source = fl->fl_ip_sport;
uh->dest = fl->fl_ip_dport;
uh->len = htons(up->len);
uh->check = 0;
udh->wcheck = csum_partial((char *)udh, sizeof(struct udphdr), if (sk->no_check == UDP_CSUM_NOXMIT) {
udh->wcheck); skb->ip_summed = CHECKSUM_NONE;
goto send;
}
if (udh->daddr) { if (skb_queue_len(&sk->write_queue) == 1) {
daddr = udh->daddr; skb->csum = csum_partial((char *)uh,
sizeof(struct udphdr), skb->csum);
uh->check = csum_ipv6_magic(fl->fl6_src,
fl->fl6_dst,
up->len, fl->proto, skb->csum);
} else { } else {
/* u32 tmp_csum = 0;
* use packet destination address
* this should improve cache locality skb_queue_walk(&sk->write_queue, skb) {
*/ tmp_csum = csum_add(tmp_csum, skb->csum);
daddr = addr + 1;
} }
udh->uh.check = csum_ipv6_magic(addr, daddr, tmp_csum = csum_partial((char *)uh,
udh->pl_len, IPPROTO_UDP, sizeof(struct udphdr), tmp_csum);
udh->wcheck); tmp_csum = csum_ipv6_magic(fl->fl6_src,
if (udh->uh.check == 0) fl->fl6_dst,
udh->uh.check = -1; up->len, fl->proto, tmp_csum);
uh->check = tmp_csum;
memcpy(buff, udh, sizeof(struct udphdr));
} }
return 0; if (uh->check == 0)
uh->check = -1;
send:
err = ip6_push_pending_frames(sk);
out:
up->len = 0;
up->pending = 0;
return err;
} }
static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int ulen) static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int len)
{ {
struct ipv6_txoptions opt_space; struct ipv6_txoptions opt_space;
struct udpv6fakehdr udh; struct udp_opt *up = udp_sk(sk);
struct inet_opt *inet = inet_sk(sk); struct inet_opt *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
struct in6_addr *daddr, *saddr = NULL;
struct ipv6_txoptions *opt = NULL; struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL; struct ip6_flowlabel *flowlabel = NULL;
struct flowi fl; struct flowi fl;
struct dst_entry *dst;
int addr_len = msg->msg_namelen; int addr_len = msg->msg_namelen;
struct in6_addr *daddr; int ulen = len;
int len = ulen + sizeof(struct udphdr);
int addr_type; int addr_type;
int hlimit = -1; int hlimit = -1;
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int err; int err;
/* Rough check on arithmetic overflow, /* Rough check on arithmetic overflow,
better check is made in ip6_build_xmit better check is made in ip6_build_xmit
*/ */
if (ulen < 0 || ulen > INT_MAX - sizeof(struct udphdr)) if (len < 0 || len > INT_MAX - sizeof(struct udphdr))
return -EMSGSIZE; return -EMSGSIZE;
if (up->pending) {
/*
* There are pending frames.
* The socket lock must be held while it's corked.
*/
lock_sock(sk);
if (likely(up->pending))
goto do_append_data;
release_sock(sk);
}
ulen += sizeof(struct udphdr);
fl.fl6_flowlabel = 0; fl.fl6_flowlabel = 0;
fl.oif = 0; fl.oif = 0;
...@@ -835,7 +857,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg ...@@ -835,7 +857,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
if (sin6->sin6_family == AF_INET) { if (sin6->sin6_family == AF_INET) {
if (__ipv6_only_sock(sk)) if (__ipv6_only_sock(sk))
return -ENETUNREACH; return -ENETUNREACH;
return udp_sendmsg(iocb, sk, msg, ulen); return udp_sendmsg(iocb, sk, msg, len);
} }
if (addr_len < SIN6_LEN_RFC2133) if (addr_len < SIN6_LEN_RFC2133)
...@@ -847,7 +869,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg ...@@ -847,7 +869,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
if (sin6->sin6_port == 0) if (sin6->sin6_port == 0)
return -EINVAL; return -EINVAL;
udh.uh.dest = sin6->sin6_port; up->dport = sin6->sin6_port;
daddr = &sin6->sin6_addr; daddr = &sin6->sin6_addr;
if (np->sndflow) { if (np->sndflow) {
...@@ -873,7 +895,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg ...@@ -873,7 +895,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
if (sk->state != TCP_ESTABLISHED) if (sk->state != TCP_ESTABLISHED)
return -ENOTCONN; return -ENOTCONN;
udh.uh.dest = inet->dport; up->dport = inet->dport;
daddr = &np->daddr; daddr = &np->daddr;
fl.fl6_flowlabel = np->flow_label; fl.fl6_flowlabel = np->flow_label;
} }
...@@ -888,15 +910,14 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg ...@@ -888,15 +910,14 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
sin.sin_family = AF_INET; sin.sin_family = AF_INET;
sin.sin_addr.s_addr = daddr->s6_addr32[3]; sin.sin_addr.s_addr = daddr->s6_addr32[3];
sin.sin_port = udh.uh.dest; sin.sin_port = up->dport;
msg->msg_name = (struct sockaddr *)(&sin); msg->msg_name = (struct sockaddr *)(&sin);
msg->msg_namelen = sizeof(sin); msg->msg_namelen = sizeof(sin);
fl6_sock_release(flowlabel); fl6_sock_release(flowlabel);
return udp_sendmsg(iocb, sk, msg, ulen); return udp_sendmsg(iocb, sk, msg, len);
} }
udh.daddr = NULL;
if (!fl.oif) if (!fl.oif)
fl.oif = sk->bound_dev_if; fl.oif = sk->bound_dev_if;
fl.fl6_src = NULL; fl.fl6_src = NULL;
...@@ -922,33 +943,172 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg ...@@ -922,33 +943,172 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg
opt = np->opt; opt = np->opt;
if (flowlabel) if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = fl6_merge_options(&opt_space, flowlabel, opt);
if (opt && opt->srcrt)
udh.daddr = daddr;
udh.uh.source = inet->sport;
udh.uh.len = len < 0x10000 ? htons(len) : 0;
udh.uh.check = 0;
udh.iov = msg->msg_iov;
udh.wcheck = 0;
udh.pl_len = len;
fl.proto = IPPROTO_UDP; fl.proto = IPPROTO_UDP;
fl.fl6_dst = daddr; fl.fl6_dst = daddr;
if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr)) if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
fl.fl6_src = &np->saddr; fl.fl6_src = &np->saddr;
fl.fl_ip_dport = udh.uh.dest; fl.fl_ip_dport = up->dport;
fl.fl_ip_sport = udh.uh.source; fl.fl_ip_sport = inet->sport;
/* merge ip6_build_xmit from ip6_output */
if (opt && opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
fl.fl6_dst = rt0->addr;
}
if (!fl.oif && ipv6_addr_is_multicast(fl.nl_u.ip6_u.daddr))
fl.oif = np->mcast_oif;
err = ip6_dst_lookup(sk, &dst, &fl, &saddr);
if (err) goto out;
if (hlimit < 0) {
if (ipv6_addr_is_multicast(fl.fl6_dst))
hlimit = np->mcast_hops;
else
hlimit = np->hop_limit;
if (hlimit < 0)
hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
}
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
lock_sock(sk);
if (unlikely(up->pending)) {
/* The socket is already corked while preparing it. */
/* ... which is an evident application bug. --ANK */
release_sock(sk);
NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
err = -EINVAL;
goto out;
}
err = ip6_build_xmit(sk, udpv6_getfrag, &udh, &fl, len, opt, hlimit, up->pending = 1;
msg->msg_flags);
do_append_data:
up->len += ulen;
err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr),
hlimit, opt, &fl, (struct rt6_info*)dst,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
err = udp_v6_push_pending_frames(sk, up);
ip6_dst_store(sk, dst, fl.nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
if (err > 0)
err = np->recverr ? net_xmit_errno(err) : 0;
release_sock(sk);
out:
fl6_sock_release(flowlabel); fl6_sock_release(flowlabel);
if (saddr) kfree(saddr);
if (!err) {
UDP6_INC_STATS_USER(UdpOutDatagrams);
return len;
}
return err;
do_confirm:
dst_confirm(dst);
if (!(msg->msg_flags&MSG_PROBE) || len)
goto back_from_confirm;
err = 0;
goto out;
}
static int udpv6_destroy_sock(struct sock *sk)
{
lock_sock(sk);
udp_v6_flush_pending_frames(sk);
release_sock(sk);
inet6_destroy_sock(sk);
return 0;
}
/*
* Socket option code for UDP
*/
static int udpv6_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen)
{
struct udp_opt *up = udp_sk(sk);
int val;
int err = 0;
if (level != SOL_UDP)
return ipv6_setsockopt(sk, level, optname, optval, optlen);
if(optlen<sizeof(int))
return -EINVAL;
if (get_user(val, (int *)optval))
return -EFAULT;
switch(optname) {
case UDP_CORK:
if (val != 0) {
up->corkflag = 1;
} else {
up->corkflag = 0;
lock_sock(sk);
udp_v6_push_pending_frames(sk, up);
release_sock(sk);
}
break;
case UDP_ENCAP:
up->encap_type = val;
break;
default:
err = -ENOPROTOOPT;
break;
};
if (err < 0)
return err; return err;
}
UDP6_INC_STATS_USER(UdpOutDatagrams); static int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ulen; char *optval, int *optlen)
{
struct udp_opt *up = udp_sk(sk);
int val, len;
if (level != SOL_UDP)
return ipv6_getsockopt(sk, level, optname, optval, optlen);
if(get_user(len,optlen))
return -EFAULT;
len = min_t(unsigned int, len, sizeof(int));
if(len < 0)
return -EINVAL;
switch(optname) {
case UDP_CORK:
val = up->corkflag;
break;
case UDP_ENCAP:
val = up->encap_type;
break;
default:
return -ENOPROTOOPT;
};
if(put_user(len, optlen))
return -EFAULT;
if(copy_to_user(optval, &val,len))
return -EFAULT;
return 0;
} }
static struct inet6_protocol udpv6_protocol = { static struct inet6_protocol udpv6_protocol = {
...@@ -1038,9 +1198,9 @@ struct proto udpv6_prot = { ...@@ -1038,9 +1198,9 @@ struct proto udpv6_prot = {
.connect = udpv6_connect, .connect = udpv6_connect,
.disconnect = udp_disconnect, .disconnect = udp_disconnect,
.ioctl = udp_ioctl, .ioctl = udp_ioctl,
.destroy = inet6_destroy_sock, .destroy = udpv6_destroy_sock,
.setsockopt = ipv6_setsockopt, .setsockopt = udpv6_setsockopt,
.getsockopt = ipv6_getsockopt, .getsockopt = udpv6_getsockopt,
.sendmsg = udpv6_sendmsg, .sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg, .recvmsg = udpv6_recvmsg,
.backlog_rcv = udpv6_queue_rcv_skb, .backlog_rcv = udpv6_queue_rcv_skb,
......
...@@ -1241,7 +1241,8 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, ...@@ -1241,7 +1241,8 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
} }
} }
if (x1 && x1->id.spi && hdr->sadb_msg_type == SADB_ADD) { if (x1 && ((x1->id.spi && hdr->sadb_msg_type == SADB_ADD) ||
(hdr->sadb_msg_type == SADB_UPDATE && xfrm_state_kern(x1)))) {
x->km.state = XFRM_STATE_DEAD; x->km.state = XFRM_STATE_DEAD;
xfrm_state_put(x); xfrm_state_put(x);
xfrm_state_put(x1); xfrm_state_put(x1);
...@@ -1286,6 +1287,11 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h ...@@ -1286,6 +1287,11 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
if (x == NULL) if (x == NULL)
return -ESRCH; return -ESRCH;
if (xfrm_state_kern(x)) {
xfrm_state_put(x);
return -EPERM;
}
xfrm_state_delete(x); xfrm_state_delete(x);
xfrm_state_put(x); xfrm_state_put(x);
......
...@@ -304,6 +304,7 @@ EXPORT_SYMBOL(xfrm_state_register_afinfo); ...@@ -304,6 +304,7 @@ EXPORT_SYMBOL(xfrm_state_register_afinfo);
EXPORT_SYMBOL(xfrm_state_unregister_afinfo); EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
EXPORT_SYMBOL(xfrm_state_get_afinfo); EXPORT_SYMBOL(xfrm_state_get_afinfo);
EXPORT_SYMBOL(xfrm_state_put_afinfo); EXPORT_SYMBOL(xfrm_state_put_afinfo);
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
EXPORT_SYMBOL(xfrm_replay_check); EXPORT_SYMBOL(xfrm_replay_check);
EXPORT_SYMBOL(xfrm_replay_advance); EXPORT_SYMBOL(xfrm_replay_advance);
EXPORT_SYMBOL(xfrm_check_selectors); EXPORT_SYMBOL(xfrm_check_selectors);
...@@ -466,6 +467,8 @@ EXPORT_SYMBOL(sysctl_tcp_tw_recycle); ...@@ -466,6 +467,8 @@ EXPORT_SYMBOL(sysctl_tcp_tw_recycle);
EXPORT_SYMBOL(sysctl_max_syn_backlog); EXPORT_SYMBOL(sysctl_max_syn_backlog);
#endif #endif
EXPORT_SYMBOL(ip_generic_getfrag);
#endif #endif
EXPORT_SYMBOL(tcp_read_sock); EXPORT_SYMBOL(tcp_read_sock);
...@@ -570,6 +573,7 @@ EXPORT_SYMBOL(netif_rx); ...@@ -570,6 +573,7 @@ EXPORT_SYMBOL(netif_rx);
EXPORT_SYMBOL(netif_receive_skb); EXPORT_SYMBOL(netif_receive_skb);
EXPORT_SYMBOL(dev_add_pack); EXPORT_SYMBOL(dev_add_pack);
EXPORT_SYMBOL(dev_remove_pack); EXPORT_SYMBOL(dev_remove_pack);
EXPORT_SYMBOL(__dev_remove_pack);
EXPORT_SYMBOL(dev_get); EXPORT_SYMBOL(dev_get);
EXPORT_SYMBOL(dev_alloc); EXPORT_SYMBOL(dev_alloc);
EXPORT_SYMBOL(dev_alloc_name); EXPORT_SYMBOL(dev_alloc_name);
......
...@@ -774,6 +774,7 @@ static int packet_release(struct socket *sock) ...@@ -774,6 +774,7 @@ static int packet_release(struct socket *sock)
*/ */
dev_remove_pack(&po->prot_hook); dev_remove_pack(&po->prot_hook);
po->running = 0; po->running = 0;
po->num = 0;
__sock_put(sk); __sock_put(sk);
} }
...@@ -819,9 +820,12 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol) ...@@ -819,9 +820,12 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
spin_lock(&po->bind_lock); spin_lock(&po->bind_lock);
if (po->running) { if (po->running) {
dev_remove_pack(&po->prot_hook);
__sock_put(sk); __sock_put(sk);
po->running = 0; po->running = 0;
po->num = 0;
spin_unlock(&po->bind_lock);
dev_remove_pack(&po->prot_hook);
spin_lock(&po->bind_lock);
} }
po->num = protocol; po->num = protocol;
...@@ -1374,7 +1378,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void ...@@ -1374,7 +1378,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
if (dev->ifindex == po->ifindex) { if (dev->ifindex == po->ifindex) {
spin_lock(&po->bind_lock); spin_lock(&po->bind_lock);
if (po->running) { if (po->running) {
dev_remove_pack(&po->prot_hook); __dev_remove_pack(&po->prot_hook);
__sock_put(sk); __sock_put(sk);
po->running = 0; po->running = 0;
sk->err = ENETDOWN; sk->err = ENETDOWN;
...@@ -1618,10 +1622,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing ...@@ -1618,10 +1622,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
/* Detach socket from network */ /* Detach socket from network */
spin_lock(&po->bind_lock); spin_lock(&po->bind_lock);
if (po->running) if (po->running) {
dev_remove_pack(&po->prot_hook); __dev_remove_pack(&po->prot_hook);
po->num = 0;
po->running = 0;
}
spin_unlock(&po->bind_lock); spin_unlock(&po->bind_lock);
synchronize_net();
err = -EBUSY; err = -EBUSY;
if (closing || atomic_read(&po->mapped) == 0) { if (closing || atomic_read(&po->mapped) == 0) {
err = 0; err = 0;
......
...@@ -222,11 +222,6 @@ used on the egress (might slow things for an iota) ...@@ -222,11 +222,6 @@ used on the egress (might slow things for an iota)
*/ */
if (dev->qdisc_ingress) { if (dev->qdisc_ingress) {
/* FIXME: Push down to ->enqueue functions --RR */
if (skb_is_nonlinear(*pskb)
&& skb_linearize(*pskb, GFP_ATOMIC) != 0)
return NF_DROP;
spin_lock(&dev->queue_lock); spin_lock(&dev->queue_lock);
if ((q = dev->qdisc_ingress) != NULL) if ((q = dev->qdisc_ingress) != NULL)
fwres = q->enqueue(skb, q); fwres = q->enqueue(skb, q);
......
...@@ -172,6 +172,7 @@ struct xfrm_state *xfrm_state_alloc(void) ...@@ -172,6 +172,7 @@ struct xfrm_state *xfrm_state_alloc(void)
if (x) { if (x) {
memset(x, 0, sizeof(struct xfrm_state)); memset(x, 0, sizeof(struct xfrm_state));
atomic_set(&x->refcnt, 1); atomic_set(&x->refcnt, 1);
atomic_set(&x->tunnel_users, 0);
INIT_LIST_HEAD(&x->bydst); INIT_LIST_HEAD(&x->bydst);
INIT_LIST_HEAD(&x->byspi); INIT_LIST_HEAD(&x->byspi);
init_timer(&x->timer); init_timer(&x->timer);
...@@ -234,6 +235,7 @@ static void __xfrm_state_delete(struct xfrm_state *x) ...@@ -234,6 +235,7 @@ static void __xfrm_state_delete(struct xfrm_state *x)
void xfrm_state_delete(struct xfrm_state *x) void xfrm_state_delete(struct xfrm_state *x)
{ {
xfrm_state_delete_tunnel(x);
spin_lock_bh(&x->lock); spin_lock_bh(&x->lock);
__xfrm_state_delete(x); __xfrm_state_delete(x);
spin_unlock_bh(&x->lock); spin_unlock_bh(&x->lock);
...@@ -248,7 +250,8 @@ void xfrm_state_flush(u8 proto) ...@@ -248,7 +250,8 @@ void xfrm_state_flush(u8 proto)
for (i = 0; i < XFRM_DST_HSIZE; i++) { for (i = 0; i < XFRM_DST_HSIZE; i++) {
restart: restart:
list_for_each_entry(x, xfrm_state_bydst+i, bydst) { list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) { if (!xfrm_state_kern(x) &&
(proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
xfrm_state_hold(x); xfrm_state_hold(x);
spin_unlock_bh(&xfrm_state_lock); spin_unlock_bh(&xfrm_state_lock);
...@@ -790,6 +793,20 @@ void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) ...@@ -790,6 +793,20 @@ void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
read_unlock(&afinfo->lock); read_unlock(&afinfo->lock);
} }
/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
void xfrm_state_delete_tunnel(struct xfrm_state *x)
{
if (x->tunnel) {
struct xfrm_state *t = x->tunnel;
if (atomic_read(&t->tunnel_users) == 2)
xfrm_state_delete(t);
atomic_dec(&t->tunnel_users);
xfrm_state_put(t);
x->tunnel = NULL;
}
}
void __init xfrm_state_init(void) void __init xfrm_state_init(void)
{ {
int i; int i;
......
...@@ -281,6 +281,11 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) ...@@ -281,6 +281,11 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
if (x == NULL) if (x == NULL)
return -ESRCH; return -ESRCH;
if (xfrm_state_kern(x)) {
xfrm_state_put(x);
return -EPERM;
}
xfrm_state_delete(x); xfrm_state_delete(x);
xfrm_state_put(x); xfrm_state_put(x);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment