Commit 4ce937cb authored by Linus Torvalds's avatar Linus Torvalds

Merge master.kernel.org:/home/davem/BK/net-2.5

into home.transmeta.com:/home/torvalds/v2.5/linux
parents 80d504c8 ff5d22a4
This diff is collapsed.
......@@ -206,7 +206,8 @@ enum netdev_state_t
__LINK_STATE_START,
__LINK_STATE_PRESENT,
__LINK_STATE_SCHED,
__LINK_STATE_NOCARRIER
__LINK_STATE_NOCARRIER,
__LINK_STATE_RX_SCHED
};
......@@ -330,6 +331,10 @@ struct net_device
void *ip6_ptr; /* IPv6 specific data */
void *ec_ptr; /* Econet specific data */
struct list_head poll_list; /* Link to poll list */
int quota;
int weight;
struct Qdisc *qdisc;
struct Qdisc *qdisc_sleeping;
struct Qdisc *qdisc_list;
......@@ -373,6 +378,7 @@ struct net_device
int (*stop)(struct net_device *dev);
int (*hard_start_xmit) (struct sk_buff *skb,
struct net_device *dev);
int (*poll) (struct net_device *dev, int *quota);
int (*hard_header) (struct sk_buff *skb,
struct net_device *dev,
unsigned short type,
......@@ -492,8 +498,11 @@ struct softnet_data
int cng_level;
int avg_blog;
struct sk_buff_head input_pkt_queue;
struct list_head poll_list;
struct net_device *output_queue;
struct sk_buff *completion_queue;
struct net_device backlog_dev; /* Sorry. 8) */
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
......@@ -547,6 +556,7 @@ static inline int netif_running(struct net_device *dev)
return test_bit(__LINK_STATE_START, &dev->state);
}
/* Use this variant when it is known for sure that it
* is executing from interrupt context.
*/
......@@ -578,6 +588,8 @@ static inline void dev_kfree_skb_any(struct sk_buff *skb)
extern void net_call_rx_atomic(void (*fn)(void));
#define HAVE_NETIF_RX 1
extern int netif_rx(struct sk_buff *skb);
#define HAVE_NETIF_RECEIVE_SKB 1
extern int netif_receive_skb(struct sk_buff *skb);
extern int dev_ioctl(unsigned int cmd, void *);
extern int dev_change_flags(struct net_device *, unsigned);
extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
......@@ -695,6 +707,78 @@ enum {
#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS)
#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA)
/* Schedule rx intr now? */
static inline int netif_rx_schedule_prep(struct net_device *dev)
{
return netif_running(dev) &&
!test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
}
/* Add interface to tail of rx poll list. This assumes that _prep has
* already been called and returned 1.
*/
static inline void __netif_rx_schedule(struct net_device *dev)
{
unsigned long flags;
int cpu = smp_processor_id();
local_irq_save(flags);
dev_hold(dev);
list_add_tail(&dev->poll_list, &softnet_data[cpu].poll_list);
if (dev->quota < 0)
dev->quota += dev->weight;
else
dev->quota = dev->weight;
__cpu_raise_softirq(cpu, NET_RX_SOFTIRQ);
local_irq_restore(flags);
}
/* Try to reschedule poll. Called by irq handler. */
static inline void netif_rx_schedule(struct net_device *dev)
{
if (netif_rx_schedule_prep(dev))
__netif_rx_schedule(dev);
}
/* Try to reschedule poll. Called by dev->poll() after netif_rx_complete().
* Do not inline this?
*/
static inline int netif_rx_reschedule(struct net_device *dev, int undo)
{
if (netif_rx_schedule_prep(dev)) {
unsigned long flags;
int cpu = smp_processor_id();
dev->quota += undo;
local_irq_save(flags);
list_add_tail(&dev->poll_list, &softnet_data[cpu].poll_list);
__cpu_raise_softirq(cpu, NET_RX_SOFTIRQ);
local_irq_restore(flags);
return 1;
}
return 0;
}
/* Remove interface from poll list: it must be in the poll list
* on current cpu. This primitive is called by dev->poll(), when
* it completes the work. The device cannot be out of poll list at this
* moment, it is BUG().
*/
static inline void netif_rx_complete(struct net_device *dev)
{
unsigned long flags;
local_irq_save(flags);
if (!test_bit(__LINK_STATE_RX_SCHED, &dev->state)) BUG();
list_del(&dev->poll_list);
clear_bit(__LINK_STATE_RX_SCHED, &dev->state);
local_irq_restore(flags);
}
/* These functions live elsewhere (drivers/net/net_init.c, but related) */
extern void ether_setup(struct net_device *dev);
......@@ -719,6 +803,7 @@ extern void dev_mcast_init(void);
extern int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev));
extern void netdev_unregister_fc(int bit);
extern int netdev_max_backlog;
extern int weight_p;
extern unsigned long netdev_fc_xoff;
extern atomic_t netdev_dropping;
extern int netdev_set_master(struct net_device *dev, struct net_device *master);
......
#ifndef __LINUX_ARP_NETFILTER_H
#define __LINUX_ARP_NETFILTER_H
/* ARP-specific defines for netfilter.
* (C)2002 Rusty Russell IBM -- This code is GPL.
*/
#include <linux/config.h>
#include <linux/netfilter.h>
/* There is no PF_ARP. */
#define NF_ARP 0
/* ARP Hooks */
#define NF_ARP_IN 0
#define NF_ARP_OUT 1
#define NF_ARP_NUMHOOKS 2
#endif /* __LINUX_ARP_NETFILTER_H */
/*
* Format of an ARP firewall descriptor
*
* src, tgt, src_mask, tgt_mask, arpop, arpop_mask are always stored in
* network byte order.
* flags are stored in host byte order (of course).
*/
#ifndef _ARPTABLES_H
#define _ARPTABLES_H
#ifdef __KERNEL__
#include <linux/if.h>
#include <linux/types.h>
#include <linux/in.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#endif
#include <linux/netfilter_arp.h>
#define ARPT_FUNCTION_MAXNAMELEN 30
#define ARPT_TABLE_MAXNAMELEN 32
#define ARPT_DEV_ADDR_LEN_MAX 16
struct arpt_devaddr_info {
char addr[ARPT_DEV_ADDR_LEN_MAX];
char mask[ARPT_DEV_ADDR_LEN_MAX];
};
/* Yes, Virginia, you have to zero the padding. */
struct arpt_arp {
/* Source and target IP addr */
struct in_addr src, tgt;
/* Mask for src and target IP addr */
struct in_addr smsk, tmsk;
/* Device hw address length, src+target device addresses */
u_int8_t arhln, arhln_mask;
struct arpt_devaddr_info src_devaddr;
struct arpt_devaddr_info tgt_devaddr;
/* ARP operation code. */
u_int16_t arpop, arpop_mask;
/* ARP hardware address and protocol address format. */
u_int16_t arhrd, arhrd_mask;
u_int16_t arpro, arpro_mask;
/* The protocol address length is only accepted if it is 4
* so there is no use in offering a way to do filtering on it.
*/
char iniface[IFNAMSIZ], outiface[IFNAMSIZ];
unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ];
/* Flags word */
u_int8_t flags;
/* Inverse flags */
u_int16_t invflags;
};
struct arpt_entry_target
{
union {
struct {
u_int16_t target_size;
/* Used by userspace */
char name[ARPT_FUNCTION_MAXNAMELEN];
} user;
struct {
u_int16_t target_size;
/* Used inside the kernel */
struct arpt_target *target;
} kernel;
/* Total length */
u_int16_t target_size;
} u;
unsigned char data[0];
};
struct arpt_standard_target
{
struct arpt_entry_target target;
int verdict;
};
struct arpt_counters
{
u_int64_t pcnt, bcnt; /* Packet and byte counters */
};
/* Values for "flag" field in struct arpt_ip (general arp structure).
* No flags defined yet.
*/
#define ARPT_F_MASK 0x00 /* All possible flag bits mask. */
/* Values for "inv" field in struct arpt_arp. */
#define ARPT_INV_VIA_IN 0x0001 /* Invert the sense of IN IFACE. */
#define ARPT_INV_VIA_OUT 0x0002 /* Invert the sense of OUT IFACE */
#define ARPT_INV_SRCIP 0x0004 /* Invert the sense of SRC IP. */
#define ARPT_INV_TGTIP 0x0008 /* Invert the sense of TGT IP. */
#define ARPT_INV_SRCDEVADDR 0x0010 /* Invert the sense of SRC DEV ADDR. */
#define ARPT_INV_TGTDEVADDR 0x0020 /* Invert the sense of TGT DEV ADDR. */
#define ARPT_INV_ARPOP 0x0040 /* Invert the sense of ARP OP. */
#define ARPT_INV_ARPHRD 0x0080 /* Invert the sense of ARP HRD. */
#define ARPT_INV_ARPPRO 0x0100 /* Invert the sense of ARP PRO. */
#define ARPT_INV_ARPHLN 0x0200 /* Invert the sense of ARP HLN. */
#define ARPT_INV_MASK 0x007F /* All possible flag bits mask. */
/* This structure defines each of the firewall rules. Consists of 3
parts which are 1) general ARP header stuff 2) match specific
stuff 3) the target to perform if the rule matches */
struct arpt_entry
{
struct arpt_arp arp;
/* Size of arpt_entry + matches */
u_int16_t target_offset;
/* Size of arpt_entry + matches + target */
u_int16_t next_offset;
/* Back pointer */
unsigned int comefrom;
/* Packet and byte counters. */
struct arpt_counters counters;
/* The matches (if any), then the target. */
unsigned char elems[0];
};
/*
* New IP firewall options for [gs]etsockopt at the RAW IP level.
* Unlike BSD Linux inherits IP options so you don't have to use a raw
* socket for this. Instead we check rights in the calls.
*/
#define ARPT_BASE_CTL 96 /* base for firewall socket options */
#define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL)
#define ARPT_SO_SET_ADD_COUNTERS (ARPT_BASE_CTL + 1)
#define ARPT_SO_SET_MAX ARPT_SO_SET_ADD_COUNTERS
#define ARPT_SO_GET_INFO (ARPT_BASE_CTL)
#define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1)
#define ARPT_SO_GET_MAX ARPT_SO_GET_ENTRIES
/* CONTINUE verdict for targets */
#define ARPT_CONTINUE 0xFFFFFFFF
/* For standard target */
#define ARPT_RETURN (-NF_MAX_VERDICT - 1)
/* The argument to ARPT_SO_GET_INFO */
struct arpt_getinfo
{
/* Which table: caller fills this in. */
char name[ARPT_TABLE_MAXNAMELEN];
/* Kernel fills these in. */
/* Which hook entry points are valid: bitmask */
unsigned int valid_hooks;
/* Hook entry points: one per netfilter hook. */
unsigned int hook_entry[NF_ARP_NUMHOOKS];
/* Underflow points. */
unsigned int underflow[NF_ARP_NUMHOOKS];
/* Number of entries */
unsigned int num_entries;
/* Size of entries. */
unsigned int size;
};
/* The argument to ARPT_SO_SET_REPLACE. */
struct arpt_replace
{
/* Which table. */
char name[ARPT_TABLE_MAXNAMELEN];
/* Which hook entry points are valid: bitmask. You can't
change this. */
unsigned int valid_hooks;
/* Number of entries */
unsigned int num_entries;
/* Total size of new entries */
unsigned int size;
/* Hook entry points. */
unsigned int hook_entry[NF_ARP_NUMHOOKS];
/* Underflow points. */
unsigned int underflow[NF_ARP_NUMHOOKS];
/* Information about old entries: */
/* Number of counters (must be equal to current number of entries). */
unsigned int num_counters;
/* The old entries' counters. */
struct arpt_counters *counters;
/* The entries (hang off end: not really an array). */
struct arpt_entry entries[0];
};
/* The argument to ARPT_SO_ADD_COUNTERS. */
struct arpt_counters_info
{
/* Which table. */
char name[ARPT_TABLE_MAXNAMELEN];
unsigned int num_counters;
/* The counters (actually `number' of these). */
struct arpt_counters counters[0];
};
/* The argument to ARPT_SO_GET_ENTRIES. */
struct arpt_get_entries
{
/* Which table: user fills this in. */
char name[ARPT_TABLE_MAXNAMELEN];
/* User fills this in: total entry size. */
unsigned int size;
/* The entries. */
struct arpt_entry entrytable[0];
};
/* Standard return verdict, or do jump. */
#define ARPT_STANDARD_TARGET ""
/* Error verdict. */
#define ARPT_ERROR_TARGET "ERROR"
/* Helper functions */
static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e)
{
return (void *)e + e->target_offset;
}
/* fn returns 0 to continue iteration */
#define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \
({ \
unsigned int __i; \
int __ret = 0; \
struct arpt_entry *__entry; \
\
for (__i = 0; __i < (size); __i += __entry->next_offset) { \
__entry = (void *)(entries) + __i; \
\
__ret = fn(__entry , ## args); \
if (__ret != 0) \
break; \
} \
__ret; \
})
/*
* Main firewall chains definitions and global var's definitions.
*/
#ifdef __KERNEL__
/* Registration hooks for targets. */
struct arpt_target
{
struct list_head list;
const char name[ARPT_FUNCTION_MAXNAMELEN];
/* Returns verdict. */
unsigned int (*target)(struct sk_buff **pskb,
unsigned int hooknum,
const struct net_device *in,
const struct net_device *out,
const void *targinfo,
void *userdata);
/* Called when user tries to insert an entry of this type:
hook_mask is a bitmask of hooks from which it can be
called. */
/* Should return true or false. */
int (*checkentry)(const char *tablename,
const struct arpt_entry *e,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask);
/* Called when entry of this type deleted. */
void (*destroy)(void *targinfo, unsigned int targinfosize);
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
};
extern int arpt_register_target(struct arpt_target *target);
extern void arpt_unregister_target(struct arpt_target *target);
/* Furniture shopping... */
struct arpt_table
{
struct list_head list;
/* A unique name... */
char name[ARPT_TABLE_MAXNAMELEN];
/* Seed table: copied in register_table */
struct arpt_replace *table;
/* What hooks you will enter on */
unsigned int valid_hooks;
/* Lock for the curtain */
rwlock_t lock;
/* Man behind the curtain... */
struct arpt_table_info *private;
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
};
extern int arpt_register_table(struct arpt_table *table);
extern void arpt_unregister_table(struct arpt_table *table);
extern unsigned int arpt_do_table(struct sk_buff **pskb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
struct arpt_table *table,
void *userdata);
#define ARPT_ALIGN(s) (((s) + (__alignof__(struct arpt_entry)-1)) & ~(__alignof__(struct arpt_entry)-1))
#endif /*__KERNEL__*/
#endif /* _ARPTABLES_H */
......@@ -11,8 +11,13 @@ enum ip_nat_manip_type
IP_NAT_MANIP_DST
};
#ifndef CONFIG_IP_NF_NAT_LOCAL
/* SRC manip occurs only on POST_ROUTING */
#define HOOK2MANIP(hooknum) ((hooknum) != NF_IP_POST_ROUTING)
#else
/* SRC manip occurs POST_ROUTING or LOCAL_IN */
#define HOOK2MANIP(hooknum) ((hooknum) != NF_IP_POST_ROUTING && (hooknum) != NF_IP_LOCAL_IN)
#endif
/* 2.3.19 (I hope) will define this in linux/netfilter_ipv4.h. */
#ifndef SO_ORIGINAL_DST
......
......@@ -202,7 +202,8 @@ enum
NET_CORE_NO_CONG_THRESH=13,
NET_CORE_NO_CONG=14,
NET_CORE_LO_CONG=15,
NET_CORE_MOD_CONG=16
NET_CORE_MOD_CONG=16,
NET_CORE_DEV_WEIGHT=17
};
/* /proc/sys/net/ethernet */
......
......@@ -798,6 +798,19 @@ int dev_close(struct net_device *dev)
clear_bit(__LINK_STATE_START, &dev->state);
/* Synchronize to scheduled poll. We cannot touch poll list,
* it can be even on different cpu. So just clear netif_running(),
* and wait when poll really will happen. Actually, the best place
* for this is inside dev->stop() after device stopped its irq
* engine, but this requires more changes in devices. */
smp_mb__after_clear_bit(); /* Commit netif_running(). */
while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
/* No hurry. */
current->state = TASK_INTERRUPTIBLE;
schedule_timeout(1);
}
/*
* Call the device specific close. This cannot fail.
* Only if device is UP
......@@ -1072,6 +1085,7 @@ int dev_queue_xmit(struct sk_buff *skb)
=======================================================================*/
int netdev_max_backlog = 300;
int weight_p = 64; /* old backlog weight */
/* These numbers are selected based on intuition and some
* experimentatiom, if you have more scientific way of doing this
* please go ahead and fix things.
......@@ -1237,13 +1251,11 @@ int netif_rx(struct sk_buff *skb)
enqueue:
dev_hold(skb->dev);
__skb_queue_tail(&queue->input_pkt_queue,skb);
/* Runs from irqs or BH's, no need to wake BH */
cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
local_irq_restore(flags);
#ifndef OFFLINE_SAMPLE
get_sample_stats(this_cpu);
#endif
return softnet_data[this_cpu].cng_level;
return queue->cng_level;
}
if (queue->throttle) {
......@@ -1253,6 +1265,8 @@ int netif_rx(struct sk_buff *skb)
netdev_wakeup();
#endif
}
netif_rx_schedule(&queue->backlog_dev);
goto enqueue;
}
......@@ -1308,19 +1322,12 @@ static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int
return ret;
}
/* Reparent skb to master device. This function is called
* only from net_rx_action under BR_NETPROTO_LOCK. It is misuse
* of BR_NETPROTO_LOCK, but it is OK for now.
*/
static __inline__ void skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
if (dev->master) {
dev_hold(dev->master);
if (dev->master)
skb->dev = dev->master;
dev_put(dev);
}
}
static void net_tx_action(struct softirq_action *h)
......@@ -1416,55 +1423,37 @@ static inline void handle_diverter(struct sk_buff *skb)
}
#endif /* CONFIG_NET_DIVERT */
static void net_rx_action(struct softirq_action *h)
int netif_receive_skb(struct sk_buff *skb)
{
int this_cpu = smp_processor_id();
struct softnet_data *queue = &softnet_data[this_cpu];
unsigned long start_time = jiffies;
int bugdet = netdev_max_backlog;
br_read_lock(BR_NETPROTO_LOCK);
for (;;) {
struct sk_buff *skb;
struct net_device *rx_dev;
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
local_irq_enable();
struct packet_type *ptype, *pt_prev;
int ret = NET_RX_DROP;
unsigned short type = skb->protocol;
if (skb == NULL)
break;
if (skb->stamp.tv_sec == 0)
do_gettimeofday(&skb->stamp);
skb_bond(skb);
rx_dev = skb->dev;
netdev_rx_stat[smp_processor_id()].total++;
#ifdef CONFIG_NET_FASTROUTE
if (skb->pkt_type == PACKET_FASTROUTE) {
netdev_rx_stat[this_cpu].fastroute_deferred_out++;
dev_queue_xmit(skb);
dev_put(rx_dev);
continue;
netdev_rx_stat[smp_processor_id()].fastroute_deferred_out++;
return dev_queue_xmit(skb);
}
#endif
skb->h.raw = skb->nh.raw = skb->data;
{
struct packet_type *ptype, *pt_prev;
unsigned short type = skb->protocol;
pt_prev = NULL;
for (ptype = ptype_all; ptype; ptype = ptype->next) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev) {
if (!pt_prev->data) {
deliver_to_old_ones(pt_prev, skb, 0);
ret = deliver_to_old_ones(pt_prev, skb, 0);
} else {
atomic_inc(&skb->users);
pt_prev->func(skb,
skb->dev,
pt_prev);
ret = pt_prev->func(skb, skb->dev, pt_prev);
}
}
pt_prev = ptype;
......@@ -1473,16 +1462,13 @@ static void net_rx_action(struct softirq_action *h)
#ifdef CONFIG_NET_DIVERT
if (skb->dev->divert && skb->dev->divert->divert)
handle_diverter(skb);
ret = handle_diverter(skb);
#endif /* CONFIG_NET_DIVERT */
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
if (skb->dev->br_port != NULL &&
br_handle_frame_hook != NULL) {
handle_bridge(skb, pt_prev);
dev_put(rx_dev);
continue;
return handle_bridge(skb, pt_prev);
}
#endif
......@@ -1490,13 +1476,11 @@ static void net_rx_action(struct softirq_action *h)
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev) {
if (!pt_prev->data)
deliver_to_old_ones(pt_prev, skb, 0);
else {
if (!pt_prev->data) {
ret = deliver_to_old_ones(pt_prev, skb, 0);
} else {
atomic_inc(&skb->users);
pt_prev->func(skb,
skb->dev,
pt_prev);
ret = pt_prev->func(skb, skb->dev, pt_prev);
}
}
pt_prev = ptype;
......@@ -1504,33 +1488,73 @@ static void net_rx_action(struct softirq_action *h)
}
if (pt_prev) {
if (!pt_prev->data)
deliver_to_old_ones(pt_prev, skb, 1);
else
pt_prev->func(skb, skb->dev, pt_prev);
} else
if (!pt_prev->data) {
ret = deliver_to_old_ones(pt_prev, skb, 1);
} else {
ret = pt_prev->func(skb, skb->dev, pt_prev);
}
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}
dev_put(rx_dev);
return ret;
}
static int process_backlog(struct net_device *backlog_dev, int *budget)
{
int work = 0;
int quota = min(backlog_dev->quota, *budget);
int this_cpu = smp_processor_id();
struct softnet_data *queue = &softnet_data[this_cpu];
unsigned long start_time = jiffies;
if (bugdet-- < 0 || jiffies - start_time > 1)
goto softnet_break;
for (;;) {
struct sk_buff *skb;
struct net_device *dev;
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
if (skb == NULL)
goto job_done;
local_irq_enable();
dev = skb->dev;
netif_receive_skb(skb);
dev_put(dev);
work++;
if (work >= quota || jiffies - start_time > 1)
break;
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
if (atomic_dec_and_test(&netdev_dropping)) {
queue->throttle = 0;
netdev_wakeup();
goto softnet_break;
break;
}
}
#endif
}
br_read_unlock(BR_NETPROTO_LOCK);
local_irq_disable();
backlog_dev->quota -= work;
*budget -= work;
return -1;
job_done:
backlog_dev->quota -= work;
*budget -= work;
list_del(&backlog_dev->poll_list);
clear_bit(__LINK_STATE_RX_SCHED, &backlog_dev->state);
if (queue->throttle) {
queue->throttle = 0;
#ifdef CONFIG_NET_HW_FLOWCONTROL
......@@ -1539,21 +1563,53 @@ static void net_rx_action(struct softirq_action *h)
#endif
}
local_irq_enable();
return 0;
}
NET_PROFILE_LEAVE(softnet_process);
return;
softnet_break:
br_read_unlock(BR_NETPROTO_LOCK);
static void net_rx_action(struct softirq_action *h)
{
int this_cpu = smp_processor_id();
struct softnet_data *queue = &softnet_data[this_cpu];
unsigned long start_time = jiffies;
int budget = netdev_max_backlog;
br_read_lock(BR_NETPROTO_LOCK);
local_irq_disable();
netdev_rx_stat[this_cpu].time_squeeze++;
/* This already runs in BH context, no need to wake up BH's */
cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
while (!list_empty(&queue->poll_list)) {
struct net_device *dev;
if (budget <= 0 || jiffies - start_time > 1)
goto softnet_break;
local_irq_enable();
NET_PROFILE_LEAVE(softnet_process);
dev = list_entry(queue->poll_list.next, struct net_device, poll_list);
if (dev->quota <= 0 || dev->poll(dev, &budget)) {
local_irq_disable();
list_del(&dev->poll_list);
list_add_tail(&dev->poll_list, &queue->poll_list);
if (dev->quota < 0)
dev->quota += dev->weight;
else
dev->quota = dev->weight;
} else {
dev_put(dev);
local_irq_disable();
}
}
local_irq_enable();
br_read_unlock(BR_NETPROTO_LOCK);
return;
softnet_break:
netdev_rx_stat[this_cpu].time_squeeze++;
__cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
local_irq_enable();
br_read_unlock(BR_NETPROTO_LOCK);
}
static gifconf_func_t * gifconf_list [NPROTO];
......@@ -2626,6 +2682,7 @@ int __init net_dev_init(void)
if (!dev_boot_phase)
return 0;
#ifdef CONFIG_NET_DIVERT
dv_init();
#endif /* CONFIG_NET_DIVERT */
......@@ -2643,6 +2700,11 @@ int __init net_dev_init(void)
queue->cng_level = 0;
queue->avg_blog = 10; /* arbitrary non-zero */
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
queue->backlog_dev.weight = weight_p;
queue->backlog_dev.poll = process_backlog;
atomic_set(&queue->backlog_dev.refcnt, 1);
}
#ifdef CONFIG_NET_PROFILE
......@@ -2744,7 +2806,6 @@ int __init net_dev_init(void)
#ifdef CONFIG_NET_SCHED
pktsched_init();
#endif
/*
* Initialise network devices
*/
......
......@@ -12,6 +12,7 @@
#ifdef CONFIG_SYSCTL
extern int netdev_max_backlog;
extern int weight_p;
extern int no_cong_thresh;
extern int no_cong;
extern int lo_cong;
......@@ -47,6 +48,9 @@ ctl_table core_table[] = {
{NET_CORE_RMEM_DEFAULT, "rmem_default",
&sysctl_rmem_default, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_DEV_WEIGHT, "dev_weight",
&weight_p, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_CORE_MAX_BACKLOG, "netdev_max_backlog",
&netdev_max_backlog, sizeof(int), 0644, NULL,
&proc_dointvec},
......
......@@ -112,7 +112,7 @@
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/netfilter_arp.h>
/*
* Interface to generic neighbour cache.
......@@ -561,7 +561,8 @@ void arp_send(int type, int ptype, u32 dest_ip,
arp_ptr+=dev->addr_len;
memcpy(arp_ptr, &dest_ip, 4);
dev_queue_xmit(skb);
/* Send it off, maybe filter it using firewalling first. */
NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, dev, dev_queue_xmit);
return;
out:
......@@ -574,45 +575,31 @@ static void parp_redo(struct sk_buff *skb)
}
/*
* Receive an arp request by the device layer.
* Process an arp request.
*/
int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
int arp_process(struct sk_buff *skb)
{
struct arphdr *arp = skb->nh.arph;
unsigned char *arp_ptr= (unsigned char *)(arp+1);
struct net_device *dev = skb->dev;
struct in_device *in_dev = in_dev_get(dev);
struct arphdr *arp;
unsigned char *arp_ptr;
struct rtable *rt;
unsigned char *sha, *tha;
u32 sip, tip;
u16 dev_type = dev->type;
int addr_type;
struct in_device *in_dev = in_dev_get(dev);
struct neighbour *n;
/*
* The hardware length of the packet should match the hardware length
* of the device. Similarly, the hardware types should match. The
* device should be ARP-able. Also, if pln is not 4, then the lookup
* is not from an IP number. We can't currently handle this, so toss
* it.
/* arp_rcv below verifies the ARP header, verifies the device
* is ARP'able, and linearizes the SKB (if needed).
*/
if (in_dev == NULL ||
arp->ar_hln != dev->addr_len ||
dev->flags & IFF_NOARP ||
skb->pkt_type == PACKET_OTHERHOST ||
skb->pkt_type == PACKET_LOOPBACK ||
arp->ar_pln != 4)
goto out;
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
goto out_of_mem;
if (in_dev == NULL)
goto out;
if (skb_is_nonlinear(skb)) {
if (skb_linearize(skb, GFP_ATOMIC) != 0)
goto freeskb;
arp = skb->nh.arph;
arp_ptr= (unsigned char *)(arp+1);
}
switch (dev_type) {
default:
......@@ -827,13 +814,41 @@ int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
out:
if (in_dev)
in_dev_put(in_dev);
freeskb:
kfree_skb(skb);
out_of_mem:
return 0;
}
/*
* Receive an arp request from the device layer.
*/
int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
{
struct arphdr *arp = skb->nh.arph;
if (arp->ar_hln != dev->addr_len ||
dev->flags & IFF_NOARP ||
skb->pkt_type == PACKET_OTHERHOST ||
skb->pkt_type == PACKET_LOOPBACK ||
arp->ar_pln != 4)
goto freeskb;
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
goto out_of_mem;
if (skb_is_nonlinear(skb)) {
if (skb_linearize(skb, GFP_ATOMIC) != 0)
goto freeskb;
}
return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
freeskb:
kfree_skb(skb);
out_of_mem:
return 0;
}
/*
* User level interface (ioctl, /proc)
......
......@@ -47,6 +47,7 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
define_bool CONFIG_IP_NF_NAT_NEEDED y
dep_tristate ' MASQUERADE target support' CONFIG_IP_NF_TARGET_MASQUERADE $CONFIG_IP_NF_NAT
dep_tristate ' REDIRECT target support' CONFIG_IP_NF_TARGET_REDIRECT $CONFIG_IP_NF_NAT
bool ' NAT of local connections' CONFIG_IP_NF_NAT_LOCAL
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
dep_tristate ' Basic SNMP-ALG support (EXPERIMENTAL)' CONFIG_IP_NF_NAT_SNMP_BASIC $CONFIG_IP_NF_NAT
fi
......@@ -79,6 +80,11 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
dep_tristate ' TCPMSS target support' CONFIG_IP_NF_TARGET_TCPMSS $CONFIG_IP_NF_IPTABLES
fi
tristate 'ARP tables support' CONFIG_IP_NF_ARPTABLES
if [ "$CONFIG_IP_NF_ARPTABLES" != "n" ]; then
dep_tristate ' ARP packet filtering' CONFIG_IP_NF_ARPFILTER $CONFIG_IP_NF_ARPTABLES
fi
# Backwards compatibility modules: only if you don't build in the others.
if [ "$CONFIG_IP_NF_CONNTRACK" != "y" ]; then
if [ "$CONFIG_IP_NF_IPTABLES" != "y" ]; then
......
......@@ -9,7 +9,7 @@
O_TARGET := netfilter.o
export-objs = ip_conntrack_standalone.o ip_conntrack_ftp.o ip_fw_compat.o ip_nat_standalone.o ip_tables.o
export-objs = ip_conntrack_standalone.o ip_conntrack_ftp.o ip_fw_compat.o ip_nat_standalone.o ip_tables.o arp_tables.o
# Multipart objects.
list-multi := ip_conntrack.o iptable_nat.o ipfwadm.o ipchains.o
......@@ -75,6 +75,12 @@ obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
# generic ARP tables
obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
# just filtering instance of ARP tables for now
obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
# backwards compatibility
obj-$(CONFIG_IP_NF_COMPAT_IPCHAINS) += ipchains.o
obj-$(CONFIG_IP_NF_COMPAT_IPFWADM) += ipfwadm.o
......
This diff is collapsed.
/*
* Filtering ARP tables module.
*
* Copyright (C) 2002 David S. Miller (davem@redhat.com)
*
*/
#include <linux/module.h>
#include <linux/netfilter_arp/arp_tables.h>
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT))
/* Standard entry. */
struct arpt_standard
{
struct arpt_entry entry;
struct arpt_standard_target target;
};
struct arpt_error_target
{
struct arpt_entry_target target;
char errorname[ARPT_FUNCTION_MAXNAMELEN];
};
struct arpt_error
{
struct arpt_entry entry;
struct arpt_error_target target;
};
static struct
{
struct arpt_replace repl;
struct arpt_standard entries[2];
struct arpt_error term;
} initial_table __initdata
= { { "filter", FILTER_VALID_HOOKS, 3,
sizeof(struct arpt_standard) * 2 + sizeof(struct arpt_error),
{ [NF_ARP_IN] 0,
[NF_ARP_OUT] sizeof(struct arpt_standard) },
{ [NF_ARP_IN] 0,
[NF_ARP_OUT] sizeof(struct arpt_standard), },
0, NULL, { } },
{
/* ARP_IN */
{
{
{
{ 0 }, { 0 }, { 0 }, { 0 },
0, 0,
{ { 0, }, { 0, } },
{ { 0, }, { 0, } },
0, 0,
0, 0,
0, 0,
"", "", { 0 }, { 0 },
0, 0
},
sizeof(struct arpt_entry),
sizeof(struct arpt_standard),
0,
{ 0, 0 }, { } },
{ { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
-NF_ACCEPT - 1 }
},
/* ARP_OUT */
{
{
{
{ 0 }, { 0 }, { 0 }, { 0 },
0, 0,
{ { 0, }, { 0, } },
{ { 0, }, { 0, } },
0, 0,
0, 0,
0, 0,
"", "", { 0 }, { 0 },
0, 0
},
sizeof(struct arpt_entry),
sizeof(struct arpt_standard),
0,
{ 0, 0 }, { } },
{ { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
-NF_ACCEPT - 1 }
}
},
/* ERROR */
{
{
{
{ 0 }, { 0 }, { 0 }, { 0 },
0, 0,
{ { 0, }, { 0, } },
{ { 0, }, { 0, } },
0, 0,
0, 0,
0, 0,
"", "", { 0 }, { 0 },
0, 0
},
sizeof(struct arpt_entry),
sizeof(struct arpt_error),
0,
{ 0, 0 }, { } },
{ { { { ARPT_ALIGN(sizeof(struct arpt_error_target)), ARPT_ERROR_TARGET } },
{ } },
"ERROR"
}
}
};
static struct arpt_table packet_filter
= { { NULL, NULL }, "filter", &initial_table.repl,
FILTER_VALID_HOOKS, RW_LOCK_UNLOCKED, NULL, THIS_MODULE };
/* The work comes in here from netfilter.c */
static unsigned int arpt_hook(unsigned int hook,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL);
}
static struct nf_hook_ops arpt_ops[]
= { { { NULL, NULL }, arpt_hook, NF_ARP, NF_ARP_IN, 0 },
{ { NULL, NULL }, arpt_hook, NF_ARP, NF_ARP_OUT, 0 }
};
static int __init init(void)
{
int ret;
/* Register table */
ret = arpt_register_table(&packet_filter);
if (ret < 0)
return ret;
/* Register hooks */
ret = nf_register_hook(&arpt_ops[0]);
if (ret < 0)
goto cleanup_table;
ret = nf_register_hook(&arpt_ops[1]);
if (ret < 0)
goto cleanup_hook0;
return ret;
cleanup_hook0:
nf_unregister_hook(&arpt_ops[0]);
cleanup_table:
arpt_unregister_table(&packet_filter);
return ret;
}
static void __exit fini(void)
{
unsigned int i;
for (i = 0; i < sizeof(arpt_ops)/sizeof(struct nf_hook_ops); i++)
nf_unregister_hook(&arpt_ops[i]);
arpt_unregister_table(&packet_filter);
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
......@@ -15,6 +15,7 @@
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/version.h>
#include <linux/brlock.h>
#include <net/checksum.h>
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
......@@ -35,6 +36,11 @@
struct module *ip_conntrack_module = THIS_MODULE;
MODULE_LICENSE("GPL");
static int kill_proto(const struct ip_conntrack *i, void *data)
{
return (i->tuplehash[IP_CT_DIR_ORIGINAL].dst.protonum ==
*((u_int8_t *) data));
}
static unsigned int
print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple,
......@@ -304,12 +310,24 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
return ret;
}
/* FIXME: Implement this --RR */
#if 0
void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
{
WRITE_LOCK(&ip_conntrack_lock);
/* find_proto() returns proto_generic in case there is no protocol
* helper. So this should be enough - HW */
LIST_DELETE(&protocol_list, proto);
WRITE_UNLOCK(&ip_conntrack_lock);
/* Somebody could be still looking at the proto in bh. */
br_write_lock_bh(BR_NETPROTO_LOCK);
br_write_unlock_bh(BR_NETPROTO_LOCK);
/* Remove all contrack entries for this protocol */
ip_ct_selective_cleanup(kill_proto, &proto->proto);
MOD_DEC_USE_COUNT;
}
#endif
static int __init init(void)
{
......@@ -325,6 +343,7 @@ module_init(init);
module_exit(fini);
EXPORT_SYMBOL(ip_conntrack_protocol_register);
EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
EXPORT_SYMBOL(invert_tuplepr);
EXPORT_SYMBOL(ip_conntrack_alter_reply);
EXPORT_SYMBOL(ip_conntrack_destroyed);
......@@ -335,6 +354,7 @@ EXPORT_SYMBOL(ip_conntrack_helper_unregister);
EXPORT_SYMBOL(ip_ct_selective_cleanup);
EXPORT_SYMBOL(ip_ct_refresh);
EXPORT_SYMBOL(ip_conntrack_expect_related);
EXPORT_SYMBOL(ip_conntrack_unexpect_related);
EXPORT_SYMBOL(ip_conntrack_tuple_taken);
EXPORT_SYMBOL(ip_ct_gather_frags);
EXPORT_SYMBOL(ip_conntrack_htable_size);
......@@ -314,6 +314,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
* do_extra_mangle last time. */
*other_ipp = saved_ip;
#ifdef CONFIG_IP_NF_NAT_LOCAL
if (hooknum == NF_IP_LOCAL_OUT
&& *var_ipp != orig_dstip
&& !do_extra_mangle(*var_ipp, other_ipp)) {
......@@ -324,6 +325,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
* anyway. */
continue;
}
#endif
/* Count how many others map onto this. */
score = count_maps(tuple->src.ip, tuple->dst.ip,
......@@ -367,11 +369,13 @@ find_best_ips_proto_fast(struct ip_conntrack_tuple *tuple,
else {
/* Only do extra mangle when required (breaks
socket binding) */
#ifdef CONFIG_IP_NF_NAT_LOCAL
if (tuple->dst.ip != mr->range[0].min_ip
&& hooknum == NF_IP_LOCAL_OUT
&& !do_extra_mangle(mr->range[0].min_ip,
&tuple->src.ip))
return NULL;
#endif
tuple->dst.ip = mr->range[0].min_ip;
}
}
......@@ -494,7 +498,10 @@ helper_cmp(const struct ip_nat_helper *helper,
static unsigned int opposite_hook[NF_IP_NUMHOOKS]
= { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
[NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING,
[NF_IP_LOCAL_OUT] = NF_IP_POST_ROUTING
#ifdef CONFIG_IP_NF_NAT_LOCAL
[NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN,
[NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT,
#endif
};
unsigned int
......
......@@ -140,8 +140,12 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
#ifdef CONFIG_IP_NF_NAT_LOCAL
IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_LOCAL_OUT);
#else
IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING);
#endif
ct = ip_conntrack_get(*pskb, &ctinfo);
......@@ -210,7 +214,7 @@ static int ipt_dnat_checkentry(const char *tablename,
/* Only allow these for NAT. */
if (strcmp(tablename, "nat") != 0) {
DEBUGP("SNAT: wrong table %s\n", tablename);
DEBUGP("DNAT: wrong table %s\n", tablename);
return 0;
}
......@@ -218,6 +222,14 @@ static int ipt_dnat_checkentry(const char *tablename,
DEBUGP("DNAT: hook mask 0x%x bad\n", hook_mask);
return 0;
}
#ifndef CONFIG_IP_NF_NAT_LOCAL
if (hook_mask & (1 << NF_IP_LOCAL_OUT)) {
DEBUGP("DNAT: CONFIG_IP_NF_NAT_LOCAL not enabled\n");
return 0;
}
#endif
return 1;
}
......
......@@ -42,6 +42,7 @@
#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \
: ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \
: ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \
: ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
: "*ERROR*")))
static unsigned int
......@@ -95,6 +96,12 @@ ip_nat_fn(unsigned int hooknum,
}
/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
case IP_CT_NEW:
#ifdef CONFIG_IP_NF_NAT_LOCAL
/* LOCAL_IN hook doesn't have a chain and thus doesn't care
* about new packets -HW */
if (hooknum == NF_IP_LOCAL_IN)
return NF_ACCEPT;
#endif
info = &ct->nat.info;
WRITE_LOCK(&ip_nat_lock);
......@@ -205,6 +212,11 @@ static struct nf_hook_ops ip_nat_out_ops
static struct nf_hook_ops ip_nat_local_out_ops
= { { NULL, NULL }, ip_nat_local_fn, PF_INET, NF_IP_LOCAL_OUT, NF_IP_PRI_NAT_DST };
#ifdef CONFIG_IP_NF_NAT_LOCAL
static struct nf_hook_ops ip_nat_local_in_ops
= { { NULL, NULL }, ip_nat_fn, PF_INET, NF_IP_LOCAL_IN, NF_IP_PRI_NAT_SRC };
#endif
/* Protocol registration. */
int ip_nat_protocol_register(struct ip_nat_protocol *proto)
{
......@@ -273,6 +285,13 @@ static int init_or_cleanup(int init)
printk("ip_nat_init: can't register local out hook.\n");
goto cleanup_outops;
}
#ifdef CONFIG_IP_NF_NAT_LOCAL
ret = nf_register_hook(&ip_nat_local_in_ops);
if (ret < 0) {
printk("ip_nat_init: can't register local in hook.\n");
goto cleanup_localoutops;
}
#endif
if (ip_conntrack_module)
__MOD_INC_USE_COUNT(ip_conntrack_module);
return ret;
......@@ -280,6 +299,10 @@ static int init_or_cleanup(int init)
cleanup:
if (ip_conntrack_module)
__MOD_DEC_USE_COUNT(ip_conntrack_module);
#ifdef CONFIG_IP_NF_NAT_LOCAL
nf_unregister_hook(&ip_nat_local_in_ops);
cleanup_localoutops:
#endif
nf_unregister_hook(&ip_nat_local_out_ops);
cleanup_outops:
nf_unregister_hook(&ip_nat_out_ops);
......
......@@ -490,6 +490,7 @@ EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(skb_clone);
EXPORT_SYMBOL(skb_copy);
EXPORT_SYMBOL(netif_rx);
EXPORT_SYMBOL(netif_receive_skb);
EXPORT_SYMBOL(dev_add_pack);
EXPORT_SYMBOL(dev_remove_pack);
EXPORT_SYMBOL(dev_get);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment