Commit af9b4549 authored by David S. Miller's avatar David S. Miller

Merge nuts.davemloft.net:/disk1/BK/nf-2.6.9

into nuts.davemloft.net:/disk1/BK/net-2.6
parents 0b8cf496 b677b829
......@@ -51,10 +51,12 @@ enum ip_conntrack_status {
#include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
#include <linux/netfilter_ipv4/ip_conntrack_icmp.h>
#include <linux/netfilter_ipv4/ip_conntrack_sctp.h>
/* per conntrack: protocol private data */
union ip_conntrack_proto {
/* insert conntrack proto private data here */
struct ip_ct_sctp sctp;
struct ip_ct_tcp tcp;
struct ip_ct_icmp icmp;
};
......@@ -156,6 +158,12 @@ struct ip_conntrack_expect
union ip_conntrack_expect_help help;
};
struct ip_conntrack_counter
{
u_int64_t packets;
u_int64_t bytes;
};
struct ip_conntrack_helper;
struct ip_conntrack
......@@ -173,6 +181,11 @@ struct ip_conntrack
/* Timer function; drops refcnt when it goes off. */
struct timer_list timeout;
#ifdef CONFIG_IP_NF_CT_ACCT
/* Accounting Information (same cache line as other written members) */
struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
#endif
/* If we're expecting another related connection, this will be
in expected linked list */
struct list_head sibling_list;
......@@ -245,10 +258,17 @@ extern int invert_tuplepr(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig);
/* Refresh conntrack for this many jiffies */
extern void ip_ct_refresh(struct ip_conntrack *ct,
unsigned long extra_jiffies);
extern void ip_ct_refresh_acct(struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
unsigned long extra_jiffies);
/* These are for NAT. Icky. */
/* Update TCP window tracking data when NAT mangles the packet */
extern int ip_conntrack_tcp_update(struct sk_buff *skb,
struct ip_conntrack *conntrack,
int dir);
/* Call me when a conntrack is destroyed. */
extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
......@@ -271,6 +291,26 @@ static inline int is_confirmed(struct ip_conntrack *ct)
}
extern unsigned int ip_conntrack_htable_size;
struct ip_conntrack_stat
{
unsigned int searched;
unsigned int found;
unsigned int new;
unsigned int invalid;
unsigned int ignore;
unsigned int delete;
unsigned int delete_list;
unsigned int insert;
unsigned int insert_failed;
unsigned int drop;
unsigned int early_drop;
unsigned int icmp_error;
unsigned int expect_new;
unsigned int expect_create;
unsigned int expect_delete;
};
/* eg. PROVIDES_CONNTRACK(ftp); */
#define PROVIDES_CONNTRACK(name) \
......
......@@ -21,15 +21,17 @@ extern struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol);
extern struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol);
extern struct list_head protocol_list;
/* Returns conntrack if it dealt with ICMP, and filled in skb->nfct */
extern struct ip_conntrack *icmp_error_track(struct sk_buff *skb,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum);
extern int get_tuple(const struct iphdr *iph,
const struct sk_buff *skb,
unsigned int dataoff,
struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_protocol *protocol);
extern int
ip_ct_get_tuple(const struct iphdr *iph,
const struct sk_buff *skb,
unsigned int dataoff,
struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_protocol *protocol);
extern int
ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig,
const struct ip_conntrack_protocol *protocol);
/* Find a connection corresponding to a tuple. */
struct ip_conntrack_tuple_hash *
......
......@@ -3,6 +3,11 @@
#define _IP_CONNTRACK_PROTOCOL_H
#include <linux/netfilter_ipv4/ip_conntrack.h>
/* length of buffer to which print_tuple/print_conntrack members are
* writing */
#define IP_CT_PRINT_BUFLEN 100
struct ip_conntrack_protocol
{
/* Next pointer. */
......@@ -50,6 +55,9 @@ struct ip_conntrack_protocol
int (*exp_matches_pkt)(struct ip_conntrack_expect *exp,
const struct sk_buff *skb);
int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
unsigned int hooknum);
/* Module (if any) which this is connected to. */
struct module *me;
};
......@@ -63,4 +71,17 @@ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
extern struct ip_conntrack_protocol ip_conntrack_protocol_icmp;
extern int ip_conntrack_protocol_tcp_init(void);
/* Log invalid packets */
extern unsigned int ip_ct_log_invalid;
#ifdef DEBUG_INVALID_PACKETS
#define LOG_INVALID(proto) \
(ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
#else
#define LOG_INVALID(proto) \
((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
&& net_ratelimit())
#endif
#endif /*_IP_CONNTRACK_PROTOCOL_H*/
#ifndef _IP_CONNTRACK_SCTP_H
#define _IP_CONNTRACK_SCTP_H
/* SCTP tracking. */
enum sctp_conntrack {
SCTP_CONNTRACK_NONE,
SCTP_CONNTRACK_CLOSED,
SCTP_CONNTRACK_COOKIE_WAIT,
SCTP_CONNTRACK_COOKIE_ECHOED,
SCTP_CONNTRACK_ESTABLISHED,
SCTP_CONNTRACK_SHUTDOWN_SENT,
SCTP_CONNTRACK_SHUTDOWN_RECD,
SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
SCTP_CONNTRACK_MAX
};
struct ip_ct_sctp
{
enum sctp_conntrack state;
u_int32_t vtag[IP_CT_DIR_MAX];
u_int32_t ttag[IP_CT_DIR_MAX];
};
#endif /* _IP_CONNTRACK_SCTP_H */
......@@ -4,25 +4,44 @@
enum tcp_conntrack {
TCP_CONNTRACK_NONE,
TCP_CONNTRACK_ESTABLISHED,
TCP_CONNTRACK_SYN_SENT,
TCP_CONNTRACK_SYN_RECV,
TCP_CONNTRACK_ESTABLISHED,
TCP_CONNTRACK_FIN_WAIT,
TCP_CONNTRACK_TIME_WAIT,
TCP_CONNTRACK_CLOSE,
TCP_CONNTRACK_CLOSE_WAIT,
TCP_CONNTRACK_LAST_ACK,
TCP_CONNTRACK_TIME_WAIT,
TCP_CONNTRACK_CLOSE,
TCP_CONNTRACK_LISTEN,
TCP_CONNTRACK_MAX
TCP_CONNTRACK_MAX,
TCP_CONNTRACK_IGNORE
};
/* Window scaling is advertised by the sender */
#define IP_CT_TCP_STATE_FLAG_WINDOW_SCALE 0x01
/* SACK is permitted by the sender */
#define IP_CT_TCP_FLAG_SACK_PERM 0x02
struct ip_ct_tcp_state {
u_int32_t td_end; /* max of seq + len */
u_int32_t td_maxend; /* max of ack + max(win, 1) */
u_int32_t td_maxwin; /* max(win) */
u_int8_t td_scale; /* window scale factor */
u_int8_t loose; /* used when connection picked up from the middle */
u_int8_t flags; /* per direction state flags */
};
struct ip_ct_tcp
{
enum tcp_conntrack state;
/* Poor man's window tracking: sequence number of valid ACK
handshake completion packet */
u_int32_t handshake_ack;
struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */
u_int8_t state; /* state of the connection (enum tcp_conntrack) */
/* For detecting stale connections */
u_int8_t last_dir; /* Direction of the last packet (enum ip_conntrack_dir) */
u_int8_t retrans; /* Number of retransmitted packets */
u_int8_t last_index; /* Index of the last packet */
u_int32_t last_seq; /* Last sequence number seen in dir */
u_int32_t last_end; /* Last seq + len */
};
#endif /* _IP_CONNTRACK_TCP_H */
......@@ -25,6 +25,9 @@ union ip_conntrack_manip_proto
struct {
u_int16_t id;
} icmp;
struct {
u_int16_t port;
} sctp;
};
/* The manipulable part of the tuple. */
......@@ -55,6 +58,9 @@ struct ip_conntrack_tuple
struct {
u_int8_t type, code;
} icmp;
struct {
u_int16_t port;
} sctp;
} u;
/* The protocol. */
......
......@@ -38,11 +38,12 @@ struct ip_nat_helper
struct ip_nat_info *info);
};
extern struct list_head helpers;
extern int ip_nat_helper_register(struct ip_nat_helper *me);
extern void ip_nat_helper_unregister(struct ip_nat_helper *me);
extern struct ip_nat_helper *
ip_nat_find_helper(const struct ip_conntrack_tuple *tuple);
/* These return true or false. */
extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb,
struct ip_conntrack *ct,
......
......@@ -336,7 +336,6 @@ ipt_get_target(struct ipt_entry *e)
* Main firewall chains definitions and global var's definitions.
*/
#ifdef __KERNEL__
static DECLARE_MUTEX(ipt_mutex);
#include <linux/init.h>
extern void ipt_init(void) __init;
......
#ifndef _IPT_SCTP_H_
#define _IPT_SCTP_H_
#define IPT_SCTP_SRC_PORTS 0x01
#define IPT_SCTP_DEST_PORTS 0x02
#define IPT_SCTP_CHUNK_TYPES 0x04
#define IPT_SCTP_VALID_FLAGS 0x07
#define ELEMCOUNT(x) (sizeof(x)/sizeof(x[0]))
struct ipt_sctp_flag_info {
u_int8_t chunktype;
u_int8_t flag;
u_int8_t flag_mask;
};
#define IPT_NUM_SCTP_FLAGS 4
struct ipt_sctp_info {
u_int16_t dpts[2]; /* Min, Max */
u_int16_t spts[2]; /* Min, Max */
u_int32_t chunkmap[256 / sizeof (u_int32_t)]; /* Bit mask of chunks to be matched according to RFC 2960 */
#define SCTP_CHUNK_MATCH_ANY 0x01 /* Match if any of the chunk types are present */
#define SCTP_CHUNK_MATCH_ALL 0x02 /* Match if all of the chunk types are present */
#define SCTP_CHUNK_MATCH_ONLY 0x04 /* Match if these are the only chunk types present */
u_int32_t chunk_match_type;
struct ipt_sctp_flag_info flag_info[IPT_NUM_SCTP_FLAGS];
int flag_count;
u_int32_t flags;
u_int32_t invflags;
};
#define bytes(type) (sizeof(type) * 8)
#define SCTP_CHUNKMAP_SET(chunkmap, type) \
do { \
chunkmap[type / bytes(u_int32_t)] |= \
1 << (type % bytes(u_int32_t)); \
} while (0)
#define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \
do { \
chunkmap[type / bytes(u_int32_t)] &= \
~(1 << (type % bytes(u_int32_t))); \
} while (0)
#define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \
({ \
(chunkmap[type / bytes (u_int32_t)] & \
(1 << (type % bytes (u_int32_t)))) ? 1: 0; \
})
#define SCTP_CHUNKMAP_RESET(chunkmap) \
do { \
int i; \
for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
chunkmap[i] = 0; \
} while (0)
#define SCTP_CHUNKMAP_SET_ALL(chunkmap) \
do { \
int i; \
for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
chunkmap[i] = ~0; \
} while (0)
#define SCTP_CHUNKMAP_COPY(destmap, srcmap) \
do { \
int i; \
for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
destmap[i] = srcmap[i]; \
} while (0)
#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \
({ \
int i; \
int flag = 1; \
for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \
if (chunkmap[i]) { \
flag = 0; \
break; \
} \
} \
flag; \
})
#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \
({ \
int i; \
int flag = 1; \
for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \
if (chunkmap[i] != ~0) { \
flag = 0; \
break; \
} \
} \
flag; \
})
#endif /* _IPT_SCTP_H_ */
......@@ -107,10 +107,6 @@ struct ip6t_counters
u_int64_t pcnt, bcnt; /* Packet and byte counters */
};
#ifdef __KERNEL__
static DECLARE_MUTEX(ip6t_mutex);
#endif
/* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */
#define IP6T_F_PROTO 0x01 /* Set if rule cares about upper
protocols */
......
......@@ -410,6 +410,18 @@ enum
NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT=12,
NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT=13,
NET_IPV4_NF_CONNTRACK_BUCKETS=14,
NET_IPV4_NF_CONNTRACK_LOG_INVALID=15,
NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16,
NET_IPV4_NF_CONNTRACK_TCP_LOOSE=17,
NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL=18,
NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS=19,
NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20,
NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21,
NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22,
NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23,
NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24,
NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25,
NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26,
};
/* /proc/sys/net/ipv6 */
......
......@@ -628,5 +628,17 @@ config IP_NF_MATCH_REALM
If you want to compile it as a module, say M here and read
Documentation/modules.txt. If unsure, say `N'.
config IP_NF_CT_ACCT
bool "Connection tracking flow accounting"
depends on IP_NF_CONNTRACK
config IP_NF_MATCH_SCTP
tristate 'SCTP protocol match support'
depends on IP_NF_IPTABLES
config IP_NF_CT_PROTO_SCTP
tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
depends on IP_NF_CONNTRACK && EXPERIMENTAL
endmenu
......@@ -19,6 +19,9 @@ ipchains-objs := $(ip_nf_compat-objs) ipchains_core.o
# connection tracking
obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
# SCTP protocol connection tracking
obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
# connection tracking helpers
obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
......@@ -43,6 +46,7 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
# matches
obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
......
......@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
#include <linux/moduleparam.h>
#include <net/checksum.h>
#include <net/udp.h>
......@@ -34,7 +35,7 @@ static unsigned int master_timeout = 300;
MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
MODULE_DESCRIPTION("Amanda connection tracking module");
MODULE_LICENSE("GPL");
MODULE_PARM(master_timeout, "i");
module_param(master_timeout, int, 0600);
MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
static char *conns[] = { "DATA ", "MESG ", "INDEX " };
......@@ -58,7 +59,7 @@ static int help(struct sk_buff *skb,
/* increase the UDP timeout of the master connection as replies from
* Amanda clients to the server can be quite delayed */
ip_ct_refresh(ct, master_timeout * HZ);
ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ);
/* No data? */
dataoff = skb->nh.iph->ihl*4 + sizeof(struct udphdr);
......
......@@ -34,8 +34,9 @@
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/jhash.h>
/* For ERR_PTR(). Yeah, I know... --RR */
#include <linux/fs.h>
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/moduleparam.h>
/* This rwlock protects the main hash table, protocol/helper/expected
registrations, conntrack timers*/
......@@ -58,6 +59,7 @@
DECLARE_RWLOCK(ip_conntrack_lock);
DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
LIST_HEAD(ip_conntrack_expect_list);
......@@ -65,11 +67,13 @@ LIST_HEAD(protocol_list);
static LIST_HEAD(helpers);
unsigned int ip_conntrack_htable_size = 0;
int ip_conntrack_max;
static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
struct list_head *ip_conntrack_hash;
static kmem_cache_t *ip_conntrack_cachep;
static kmem_cache_t *ip_conntrack_expect_cachep;
struct ip_conntrack ip_conntrack_untracked;
DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
......@@ -127,11 +131,11 @@ hash_conntrack(const struct ip_conntrack_tuple *tuple)
}
int
get_tuple(const struct iphdr *iph,
const struct sk_buff *skb,
unsigned int dataoff,
struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_protocol *protocol)
ip_ct_get_tuple(const struct iphdr *iph,
const struct sk_buff *skb,
unsigned int dataoff,
struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_protocol *protocol)
{
/* Never happen */
if (iph->frag_off & htons(IP_OFFSET)) {
......@@ -147,10 +151,10 @@ get_tuple(const struct iphdr *iph,
return protocol->pkt_to_tuple(skb, dataoff, tuple);
}
static int
invert_tuple(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig,
const struct ip_conntrack_protocol *protocol)
int
ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig,
const struct ip_conntrack_protocol *protocol)
{
inverse->src.ip = orig->dst.ip;
inverse->dst.ip = orig->src.ip;
......@@ -177,7 +181,8 @@ destroy_expect(struct ip_conntrack_expect *exp)
IP_NF_ASSERT(atomic_read(&exp->use) == 0);
IP_NF_ASSERT(!timer_pending(&exp->timeout));
kfree(exp);
kmem_cache_free(ip_conntrack_expect_cachep, exp);
__get_cpu_var(ip_conntrack_stat).expect_delete++;
}
inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
......@@ -336,7 +341,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
list_del(&ct->master->expected_list);
master = ct->master->expectant;
}
kfree(ct->master);
kmem_cache_free(ip_conntrack_expect_cachep, ct->master);
}
WRITE_UNLOCK(&ip_conntrack_lock);
......@@ -346,12 +351,15 @@ destroy_conntrack(struct nf_conntrack *nfct)
DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
kmem_cache_free(ip_conntrack_cachep, ct);
atomic_dec(&ip_conntrack_count);
__get_cpu_var(ip_conntrack_stat).delete++;
}
static void death_by_timeout(unsigned long ul_conntrack)
{
struct ip_conntrack *ct = (void *)ul_conntrack;
__get_cpu_var(ip_conntrack_stat).delete_list++;
WRITE_LOCK(&ip_conntrack_lock);
clean_from_lists(ct);
WRITE_UNLOCK(&ip_conntrack_lock);
......@@ -374,13 +382,19 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
{
struct ip_conntrack_tuple_hash *h;
unsigned int hash = hash_conntrack(tuple);
/* use per_cpu() to avoid multiple calls to smp_processor_id() */
unsigned int cpu = smp_processor_id();
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
h = LIST_FIND(&ip_conntrack_hash[hash],
conntrack_tuple_cmp,
struct ip_conntrack_tuple_hash *,
tuple, ignored_conntrack);
return h;
list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
per_cpu(ip_conntrack_stat, cpu).found++;
return h;
}
per_cpu(ip_conntrack_stat, cpu).searched++;
}
return NULL;
}
/* Find a connection corresponding to a tuple. */
......@@ -474,10 +488,12 @@ __ip_conntrack_confirm(struct nf_ct_info *nfct)
atomic_inc(&ct->ct_general.use);
set_bit(IPS_CONFIRMED_BIT, &ct->status);
WRITE_UNLOCK(&ip_conntrack_lock);
__get_cpu_var(ip_conntrack_stat).insert++;
return NF_ACCEPT;
}
WRITE_UNLOCK(&ip_conntrack_lock);
__get_cpu_var(ip_conntrack_stat).insert_failed++;
return NF_DROP;
}
......@@ -496,83 +512,6 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
return h != NULL;
}
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
struct ip_conntrack *
icmp_error_track(struct sk_buff *skb,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
struct ip_conntrack_tuple innertuple, origtuple;
struct {
struct icmphdr icmp;
struct iphdr ip;
} inside;
struct ip_conntrack_protocol *innerproto;
struct ip_conntrack_tuple_hash *h;
int dataoff;
IP_NF_ASSERT(skb->nfct == NULL);
/* Not enough header? */
if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
return NULL;
if (inside.icmp.type != ICMP_DEST_UNREACH
&& inside.icmp.type != ICMP_SOURCE_QUENCH
&& inside.icmp.type != ICMP_TIME_EXCEEDED
&& inside.icmp.type != ICMP_PARAMETERPROB
&& inside.icmp.type != ICMP_REDIRECT)
return NULL;
/* Ignore ICMP's containing fragments (shouldn't happen) */
if (inside.ip.frag_off & htons(IP_OFFSET)) {
DEBUGP("icmp_error_track: fragment of proto %u\n",
inside.ip.protocol);
return NULL;
}
innerproto = ip_ct_find_proto(inside.ip.protocol);
dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
/* Are they talking about one of our connections? */
if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
return NULL;
}
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
DEBUGP("icmp_error_track: Can't invert tuple\n");
return NULL;
}
*ctinfo = IP_CT_RELATED;
h = ip_conntrack_find_get(&innertuple, NULL);
if (!h) {
/* Locally generated ICMPs will match inverted if they
haven't been SNAT'ed yet */
/* FIXME: NAT code has to handle half-done double NAT --RR */
if (hooknum == NF_IP_LOCAL_OUT)
h = ip_conntrack_find_get(&origtuple, NULL);
if (!h) {
DEBUGP("icmp_error_track: no match\n");
return NULL;
}
/* Reverse direction from that found */
if (DIRECTION(h) != IP_CT_DIR_REPLY)
*ctinfo += IP_CT_IS_REPLY;
} else {
if (DIRECTION(h) == IP_CT_DIR_REPLY)
*ctinfo += IP_CT_IS_REPLY;
}
/* Update skb to refer to this connection */
skb->nfct = &h->ctrack->infos[*ctinfo];
return h->ctrack;
}
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
......@@ -598,6 +537,7 @@ static int early_drop(struct list_head *chain)
if (del_timer(&h->ctrack->timeout)) {
death_by_timeout((unsigned long)h->ctrack);
dropped = 1;
__get_cpu_var(ip_conntrack_stat).early_drop++;
}
ip_conntrack_put(h->ctrack);
return dropped;
......@@ -654,7 +594,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
}
}
if (!invert_tuple(&repl_tuple, tuple, protocol)) {
if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
DEBUGP("Can't invert tuple.\n");
return NULL;
}
......@@ -693,41 +633,53 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack_expect *, tuple);
READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
/* If master is not in hash table yet (ie. packet hasn't left
this machine yet), how can other end know about expected?
Hence these are not the droids you are looking for (if
master ct never got confirmed, we'd hold a reference to it
and weird things would happen to future packets). */
if (expected && !is_confirmed(expected->expectant))
expected = NULL;
/* Look up the conntrack helper for master connections only */
if (!expected)
conntrack->helper = ip_ct_find_helper(&repl_tuple);
if (expected) {
/* If master is not in hash table yet (ie. packet hasn't left
this machine yet), how can other end know about expected?
Hence these are not the droids you are looking for (if
master ct never got confirmed, we'd hold a reference to it
and weird things would happen to future packets). */
if (!is_confirmed(expected->expectant)) {
conntrack->helper = ip_ct_find_helper(&repl_tuple);
goto end;
}
/* If the expectation is dying, then this is a loser. */
if (expected
&& expected->expectant->helper->timeout
&& ! del_timer(&expected->timeout))
expected = NULL;
/* Expectation is dying... */
if (expected->expectant->helper->timeout
&& !del_timer(&expected->timeout))
goto end;
if (expected) {
DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
conntrack, expected);
/* Welcome, Mr. Bond. We've been expecting you... */
IP_NF_ASSERT(master_ct(conntrack));
__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
conntrack->master = expected;
expected->sibling = conntrack;
LIST_DELETE(&ip_conntrack_expect_list, expected);
expected->expectant->expecting--;
nf_conntrack_get(&master_ct(conntrack)->infos[0]);
/* this is a braindead... --pablo */
atomic_inc(&ip_conntrack_count);
WRITE_UNLOCK(&ip_conntrack_lock);
if (expected->expectfn)
expected->expectfn(conntrack);
__get_cpu_var(ip_conntrack_stat).expect_new++;
goto ret;
} else {
conntrack->helper = ip_ct_find_helper(&repl_tuple);
__get_cpu_var(ip_conntrack_stat).new++;
}
atomic_inc(&ip_conntrack_count);
end: atomic_inc(&ip_conntrack_count);
WRITE_UNLOCK(&ip_conntrack_lock);
if (expected && expected->expectfn)
expected->expectfn(conntrack);
return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
ret: return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
}
/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
......@@ -743,7 +695,8 @@ resolve_normal_ct(struct sk_buff *skb,
IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
&tuple,proto))
return NULL;
/* look for tuple match */
......@@ -823,38 +776,51 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
#endif
/* Previously seen (loopback or untracked)? Ignore. */
if ((*pskb)->nfct)
if ((*pskb)->nfct) {
__get_cpu_var(ip_conntrack_stat).ignore++;
return NF_ACCEPT;
}
proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
/* It may be an icmp error... */
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
&& icmp_error_track(*pskb, &ctinfo, hooknum))
return NF_ACCEPT;
/* It may be an special packet, error, unclean...
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
if (proto->error != NULL
&& (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
__get_cpu_var(ip_conntrack_stat).icmp_error++;
return -ret;
}
if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
/* Not valid part of a connection */
__get_cpu_var(ip_conntrack_stat).invalid++;
return NF_ACCEPT;
}
if (IS_ERR(ct))
if (IS_ERR(ct)) {
/* Too stressed to deal. */
__get_cpu_var(ip_conntrack_stat).drop++;
return NF_DROP;
}
IP_NF_ASSERT((*pskb)->nfct);
ret = proto->packet(ct, *pskb, ctinfo);
if (ret == -1) {
/* Invalid */
if (ret < 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do*/
nf_conntrack_put((*pskb)->nfct);
(*pskb)->nfct = NULL;
return NF_ACCEPT;
__get_cpu_var(ip_conntrack_stat).invalid++;
return -ret;
}
if (ret != NF_DROP && ct->helper) {
ret = ct->helper->help(*pskb, ct, ctinfo);
if (ret == -1) {
/* Invalid */
__get_cpu_var(ip_conntrack_stat).invalid++;
nf_conntrack_put((*pskb)->nfct);
(*pskb)->nfct = NULL;
return NF_ACCEPT;
......@@ -869,7 +835,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
int invert_tuplepr(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig)
{
return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
return ip_ct_invert_tuple(inverse, orig,
ip_ct_find_proto(orig->dst.protonum));
}
static inline int resent_expect(const struct ip_conntrack_expect *i,
......@@ -923,9 +890,8 @@ struct ip_conntrack_expect *
ip_conntrack_expect_alloc(void)
{
struct ip_conntrack_expect *new;
new = (struct ip_conntrack_expect *)
kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
if (!new) {
DEBUGP("expect_related: OOM allocating expect\n");
return NULL;
......@@ -933,6 +899,7 @@ ip_conntrack_expect_alloc(void)
/* tuple_cmp compares whole union, we have to initialized cleanly */
memset(new, 0, sizeof(struct ip_conntrack_expect));
atomic_set(&new->use, 1);
return new;
}
......@@ -944,7 +911,6 @@ ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
new->expectant = related_to;
new->sibling = NULL;
atomic_set(&new->use, 1);
/* add to expected list for this connection */
list_add_tail(&new->expected_list, &related_to->sibling_list);
......@@ -997,7 +963,8 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
}
WRITE_UNLOCK(&ip_conntrack_lock);
kfree(expect);
/* This expectation is not inserted so no need to lock */
kmem_cache_free(ip_conntrack_expect_cachep, expect);
return -EEXIST;
} else if (related_to->helper->max_expected &&
......@@ -1015,7 +982,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
related_to->helper->name,
NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
kfree(expect);
kmem_cache_free(ip_conntrack_expect_cachep, expect);
return -EPERM;
}
DEBUGP("ip_conntrack: max number of expected "
......@@ -1049,7 +1016,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
WRITE_UNLOCK(&ip_conntrack_lock);
DEBUGP("expect_related: busy!\n");
kfree(expect);
kmem_cache_free(ip_conntrack_expect_cachep, expect);
return -EBUSY;
}
......@@ -1057,6 +1024,8 @@ out: ip_conntrack_expect_insert(expect, related_to);
WRITE_UNLOCK(&ip_conntrack_lock);
__get_cpu_var(ip_conntrack_stat).expect_create++;
return ret;
}
......@@ -1164,21 +1133,39 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
synchronize_net();
}
/* Refresh conntrack for this many jiffies. */
void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
static inline void ct_add_counters(struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb)
{
#ifdef CONFIG_IP_NF_CT_ACCT
if (skb) {
ct->counters[CTINFO2DIR(ctinfo)].packets++;
ct->counters[CTINFO2DIR(ctinfo)].bytes +=
ntohs(skb->nh.iph->tot_len);
}
#endif
}
/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
void ip_ct_refresh_acct(struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
unsigned long extra_jiffies)
{
IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
/* If not in hash table, timer will not be active yet */
if (!is_confirmed(ct))
if (!is_confirmed(ct)) {
ct->timeout.expires = extra_jiffies;
else {
ct_add_counters(ct, ctinfo, skb);
} else {
WRITE_LOCK(&ip_conntrack_lock);
/* Need del_timer for race avoidance (may already be dying). */
if (del_timer(&ct->timeout)) {
ct->timeout.expires = jiffies + extra_jiffies;
add_timer(&ct->timeout);
}
ct_add_counters(ct, ctinfo, skb);
WRITE_UNLOCK(&ip_conntrack_lock);
}
}
......@@ -1368,12 +1355,13 @@ void ip_conntrack_cleanup(void)
}
kmem_cache_destroy(ip_conntrack_cachep);
kmem_cache_destroy(ip_conntrack_expect_cachep);
vfree(ip_conntrack_hash);
nf_unregister_sockopt(&so_getorigdst);
}
static int hashsize;
MODULE_PARM(hashsize, "i");
module_param(hashsize, int, 0400);
int __init ip_conntrack_init(void)
{
......@@ -1420,6 +1408,15 @@ int __init ip_conntrack_init(void)
printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
goto err_free_hash;
}
ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
sizeof(struct ip_conntrack_expect),
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
if (!ip_conntrack_expect_cachep) {
printk(KERN_ERR "Unable to create ip_expect slab cache\n");
goto err_free_conntrack_slab;
}
/* Don't NEED lock here, but good form anyway. */
WRITE_LOCK(&ip_conntrack_lock);
/* Sew in builtin protocols. */
......@@ -1447,6 +1444,8 @@ int __init ip_conntrack_init(void)
return ret;
err_free_conntrack_slab:
kmem_cache_destroy(ip_conntrack_cachep);
err_free_hash:
vfree(ip_conntrack_hash);
err_unreg_sockopt:
......
......@@ -19,6 +19,7 @@
#include <linux/netfilter_ipv4/lockhelp.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
#include <linux/moduleparam.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
......@@ -33,10 +34,10 @@ struct module *ip_conntrack_ftp = THIS_MODULE;
#define MAX_PORTS 8
static int ports[MAX_PORTS];
static int ports_c;
MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
module_param_array(ports, int, ports_c, 0400);
static int loose;
MODULE_PARM(loose, "i");
module_param(loose, int, 0600);
#if 0
#define DEBUGP printk
......@@ -420,10 +421,10 @@ static int __init init(void)
int i, ret;
char *tmpname;
if (ports[0] == 0)
ports[0] = FTP_PORT;
if (ports_c == 0)
ports[ports_c++] = FTP_PORT;
for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
for (i = 0; i < ports_c; i++) {
ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
ftp[i].tuple.dst.protonum = IPPROTO_TCP;
ftp[i].mask.src.u.tcp.port = 0xFFFF;
......@@ -449,7 +450,6 @@ static int __init init(void)
fini();
return ret;
}
ports_c++;
}
return 0;
}
......
......@@ -32,6 +32,7 @@
#include <linux/netfilter_ipv4/lockhelp.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
#include <linux/moduleparam.h>
#define MAX_PORTS 8
static int ports[MAX_PORTS];
......@@ -44,11 +45,11 @@ static char irc_buffer[65536];
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
MODULE_LICENSE("GPL");
MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
module_param_array(ports, int, ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of IRC servers");
MODULE_PARM(max_dcc_channels, "i");
module_param(max_dcc_channels, int, 0400);
MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
MODULE_PARM(dcc_timeout, "i");
module_param(dcc_timeout, int, 0400);
MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
......@@ -252,10 +253,10 @@ static int __init init(void)
}
/* If no port given, default to standard irc port */
if (ports[0] == 0)
ports[0] = IRC_PORT;
if (ports_c == 0)
ports[ports_c++] = IRC_PORT;
for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
for (i = 0; i < ports_c; i++) {
hlpr = &irc_helpers[i];
hlpr->tuple.src.u.tcp.port = htons(ports[i]);
hlpr->tuple.dst.protonum = IPPROTO_TCP;
......@@ -284,7 +285,6 @@ static int __init init(void)
fini();
return -EBUSY;
}
ports_c++;
}
return 0;
}
......
......@@ -50,9 +50,9 @@ static unsigned int generic_print_conntrack(char *buffer,
/* Returns verdict for packet, or -1 for invalid. */
static int packet(struct ip_conntrack *conntrack,
const struct sk_buff *skb,
enum ip_conntrack_info conntrackinfo)
enum ip_conntrack_info ctinfo)
{
ip_ct_refresh(conntrack, ip_ct_generic_timeout);
ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
return NF_ACCEPT;
}
......@@ -62,8 +62,14 @@ static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
return 1;
}
struct ip_conntrack_protocol ip_conntrack_generic_protocol
= { { NULL, NULL }, 0, "unknown",
generic_pkt_to_tuple, generic_invert_tuple, generic_print_tuple,
generic_print_conntrack, packet, new, NULL, NULL, NULL };
struct ip_conntrack_protocol ip_conntrack_generic_protocol =
{
.proto = 0,
.name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
.print_tuple = generic_print_tuple,
.print_conntrack = generic_print_conntrack,
.packet = packet,
.new = new,
};
......@@ -12,6 +12,11 @@
#include <linux/netfilter.h>
#include <linux/in.h>
#include <linux/icmp.h>
#include <net/ip.h>
#include <net/checksum.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
unsigned long ip_ct_icmp_timeout = 30*HZ;
......@@ -94,7 +99,7 @@ static int icmp_packet(struct ip_conntrack *ct,
ct->timeout.function((unsigned long)ct);
} else {
atomic_inc(&ct->proto.icmp.count);
ip_ct_refresh(ct, ip_ct_icmp_timeout);
ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
}
return NF_ACCEPT;
......@@ -122,7 +127,147 @@ static int icmp_new(struct ip_conntrack *conntrack,
return 1;
}
struct ip_conntrack_protocol ip_conntrack_protocol_icmp
= { { NULL, NULL }, IPPROTO_ICMP, "icmp",
icmp_pkt_to_tuple, icmp_invert_tuple, icmp_print_tuple,
icmp_print_conntrack, icmp_packet, icmp_new, NULL, NULL, NULL };
static int
icmp_error_message(struct sk_buff *skb,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
struct ip_conntrack_tuple innertuple, origtuple;
struct {
struct icmphdr icmp;
struct iphdr ip;
} inside;
struct ip_conntrack_protocol *innerproto;
struct ip_conntrack_tuple_hash *h;
int dataoff;
IP_NF_ASSERT(skb->nfct == NULL);
/* Not enough header? */
if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
return NF_ACCEPT;
/* Ignore ICMP's containing fragments (shouldn't happen) */
if (inside.ip.frag_off & htons(IP_OFFSET)) {
DEBUGP("icmp_error_track: fragment of proto %u\n",
inside.ip.protocol);
return NF_ACCEPT;
}
innerproto = ip_ct_find_proto(inside.ip.protocol);
dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
/* Are they talking about one of our connections? */
if (!ip_ct_get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
return NF_ACCEPT;
}
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
DEBUGP("icmp_error_track: Can't invert tuple\n");
return NF_ACCEPT;
}
*ctinfo = IP_CT_RELATED;
h = ip_conntrack_find_get(&innertuple, NULL);
if (!h) {
/* Locally generated ICMPs will match inverted if they
haven't been SNAT'ed yet */
/* FIXME: NAT code has to handle half-done double NAT --RR */
if (hooknum == NF_IP_LOCAL_OUT)
h = ip_conntrack_find_get(&origtuple, NULL);
if (!h) {
DEBUGP("icmp_error_track: no match\n");
return NF_ACCEPT;
}
/* Reverse direction from that found */
if (DIRECTION(h) != IP_CT_DIR_REPLY)
*ctinfo += IP_CT_IS_REPLY;
} else {
if (DIRECTION(h) == IP_CT_DIR_REPLY)
*ctinfo += IP_CT_IS_REPLY;
}
/* Update skb to refer to this connection */
skb->nfct = &h->ctrack->infos[*ctinfo];
return -NF_ACCEPT;
}
/* Small and modified version of icmp_rcv */
static int
icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
struct icmphdr icmph;
/* Not enough header? */
if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &icmph, sizeof(icmph))!=0) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_icmp: short packet ");
return -NF_ACCEPT;
}
/* See ip_conntrack_proto_tcp.c */
if (hooknum != NF_IP_PRE_ROUTING)
goto checksum_skipped;
switch (skb->ip_summed) {
case CHECKSUM_HW:
if (!(u16)csum_fold(skb->csum))
break;
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_icmp: bad HW ICMP checksum ");
return -NF_ACCEPT;
case CHECKSUM_NONE:
if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_icmp: bad ICMP checksum ");
return -NF_ACCEPT;
}
default:
break;
}
checksum_skipped:
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
*
* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
* discarded.
*/
if (icmph.type > NR_ICMP_TYPES) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_icmp: invalid ICMP type ");
return -NF_ACCEPT;
}
/* Need to track icmp error message? */
if (icmph.type != ICMP_DEST_UNREACH
&& icmph.type != ICMP_SOURCE_QUENCH
&& icmph.type != ICMP_TIME_EXCEEDED
&& icmph.type != ICMP_PARAMETERPROB
&& icmph.type != ICMP_REDIRECT)
return NF_ACCEPT;
return icmp_error_message(skb, ctinfo, hooknum);
}
struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
{
.proto = IPPROTO_ICMP,
.name = "icmp",
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
.print_tuple = icmp_print_tuple,
.print_conntrack = icmp_print_conntrack,
.packet = icmp_packet,
.new = icmp_new,
.error = icmp_error,
};
/*
* Connection tracking protocol helper module for SCTP.
*
* SCTP is defined in RFC 2960. References to various sections in this code
* are to this RFC.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
/*
* Added support for proc manipulation of timeouts.
*/
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/sctp.h>
#include <linux/string.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/lockhelp.h>
#if 0
#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
#else
#define DEBUGP(format, args...)
#endif
/* Protects conntrack->proto.sctp */
static DECLARE_RWLOCK(sctp_lock);
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR
And so for me for SCTP :D -Kiran */
static const char *sctp_conntrack_names[] = {
"NONE",
"CLOSED",
"COOKIE_WAIT",
"COOKIE_ECHOED",
"ESTABLISHED",
"SHUTDOWN_SENT",
"SHUTDOWN_RECD",
"SHUTDOWN_ACK_SENT",
};
#define SECS * HZ
#define MINS * 60 SECS
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
unsigned long ip_ct_sctp_timeout_closed = 10 SECS;
unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS;
unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
unsigned long ip_ct_sctp_timeout_established = 5 DAYS;
unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
static unsigned long * sctp_timeouts[]
= { 0, /* SCTP_CONNTRACK_NONE */
&ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
&ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
&ip_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
&ip_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
&ip_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
&ip_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
&ip_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
};
#define sNO SCTP_CONNTRACK_NONE
#define sCL SCTP_CONNTRACK_CLOSED
#define sCW SCTP_CONNTRACK_COOKIE_WAIT
#define sCE SCTP_CONNTRACK_COOKIE_ECHOED
#define sES SCTP_CONNTRACK_ESTABLISHED
#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
#define sIV SCTP_CONNTRACK_MAX
/*
These are the descriptions of the states:
NOTE: These state names are tantalizingly similar to the states of an
SCTP endpoint. But the interpretation of the states is a little different,
considering that these are the states of the connection and not of an end
point. Please note the subtleties. -Kiran
NONE - Nothing so far.
COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
an INIT_ACK chunk in the reply direction.
COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
to that of the SHUTDOWN chunk.
CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
the SHUTDOWN chunk. Connection is closed.
*/
/* TODO
- I have assumed that the first INIT is in the original direction.
This messes things when an INIT comes in the reply direction in CLOSED
state.
- Check the error type in the reply dir before transitioning from
cookie echoed to closed.
- Sec 5.2.4 of RFC 2960
- Multi Homing support.
*/
/* SCTP conntrack state transitions */
static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
},
{
/* REPLY */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
}
};
static int sctp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct ip_conntrack_tuple *tuple)
{
sctp_sctphdr_t hdr;
DEBUGP(__FUNCTION__);
DEBUGP("\n");
/* Actually only need first 8 bytes. */
if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0)
return 0;
tuple->src.u.sctp.port = hdr.source;
tuple->dst.u.sctp.port = hdr.dest;
return 1;
}
static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_tuple *orig)
{
DEBUGP(__FUNCTION__);
DEBUGP("\n");
tuple->src.u.sctp.port = orig->dst.u.sctp.port;
tuple->dst.u.sctp.port = orig->src.u.sctp.port;
return 1;
}
/* Print out the per-protocol part of the tuple. */
static unsigned int sctp_print_tuple(char *buffer,
const struct ip_conntrack_tuple *tuple)
{
DEBUGP(__FUNCTION__);
DEBUGP("\n");
return sprintf(buffer, "sport=%hu dport=%hu ",
ntohs(tuple->src.u.sctp.port),
ntohs(tuple->dst.u.sctp.port));
}
/* Print out the private part of the conntrack. */
static unsigned int sctp_print_conntrack(char *buffer,
const struct ip_conntrack *conntrack)
{
enum sctp_conntrack state;
DEBUGP(__FUNCTION__);
DEBUGP("\n");
READ_LOCK(&sctp_lock);
state = conntrack->proto.sctp.state;
READ_UNLOCK(&sctp_lock);
return sprintf(buffer, "%s ", sctp_conntrack_names[state]);
}
#define for_each_sctp_chunk(skb, sch, offset, count) \
for (offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t), count = 0; \
offset < skb->len && !skb_copy_bits(skb, offset, &sch, sizeof(sch)); \
offset += (htons(sch.length) + 3) & ~3, count++)
/* Some validity checks to make sure the chunks are fine */
static int do_basic_checks(struct ip_conntrack *conntrack,
const struct sk_buff *skb,
char *map)
{
u_int32_t offset, count;
sctp_chunkhdr_t sch;
int flag;
DEBUGP(__FUNCTION__);
DEBUGP("\n");
flag = 0;
for_each_sctp_chunk (skb, sch, offset, count) {
DEBUGP("Chunk Num: %d Type: %d\n", count, sch.type);
if (sch.type == SCTP_CID_INIT
|| sch.type == SCTP_CID_INIT_ACK
|| sch.type == SCTP_CID_SHUTDOWN_COMPLETE) {
flag = 1;
}
/* Cookie Ack/Echo chunks not the first OR
Init / Init Ack / Shutdown compl chunks not the only chunks */
if ((sch.type == SCTP_CID_COOKIE_ACK
|| sch.type == SCTP_CID_COOKIE_ECHO
|| flag)
&& count !=0 ) {
DEBUGP("Basic checks failed\n");
return 1;
}
if (map) {
set_bit (sch.type, (void *)map);
}
}
DEBUGP("Basic checks passed\n");
return 0;
}
static int new_state(enum ip_conntrack_dir dir,
enum sctp_conntrack cur_state,
int chunk_type)
{
int i;
DEBUGP(__FUNCTION__);
DEBUGP("\n");
DEBUGP("Chunk type: %d\n", chunk_type);
switch (chunk_type) {
case SCTP_CID_INIT:
DEBUGP("SCTP_CID_INIT\n");
i = 0; break;
case SCTP_CID_INIT_ACK:
DEBUGP("SCTP_CID_INIT_ACK\n");
i = 1; break;
case SCTP_CID_ABORT:
DEBUGP("SCTP_CID_ABORT\n");
i = 2; break;
case SCTP_CID_SHUTDOWN:
DEBUGP("SCTP_CID_SHUTDOWN\n");
i = 3; break;
case SCTP_CID_SHUTDOWN_ACK:
DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
i = 4; break;
case SCTP_CID_ERROR:
DEBUGP("SCTP_CID_ERROR\n");
i = 5; break;
case SCTP_CID_COOKIE_ECHO:
DEBUGP("SCTP_CID_COOKIE_ECHO\n");
i = 6; break;
case SCTP_CID_COOKIE_ACK:
DEBUGP("SCTP_CID_COOKIE_ACK\n");
i = 7; break;
case SCTP_CID_SHUTDOWN_COMPLETE:
DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
i = 8; break;
default:
/* Other chunks like DATA, SACK, HEARTBEAT and
its ACK do not cause a change in state */
DEBUGP("Unknown chunk type, Will stay in %s\n",
sctp_conntrack_names[cur_state]);
return cur_state;
}
DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
dir, sctp_conntrack_names[cur_state], chunk_type,
sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
return sctp_conntracks[dir][i][cur_state];
}
/* Returns verdict for packet, or -1 for invalid. */
static int sctp_packet(struct ip_conntrack *conntrack,
const struct sk_buff *skb,
enum ip_conntrack_info ctinfo)
{
enum sctp_conntrack newconntrack, oldsctpstate;
sctp_sctphdr_t sctph;
sctp_chunkhdr_t sch;
u_int32_t offset, count;
char map[256 / sizeof (char)] = {0};
DEBUGP(__FUNCTION__);
DEBUGP("\n");
if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &sctph, sizeof(sctph)) != 0)
return -1;
if (do_basic_checks(conntrack, skb, map) != 0)
return -1;
/* Check the verification tag (Sec 8.5) */
if (!test_bit(SCTP_CID_INIT, (void *)map)
&& !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
&& !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
&& !test_bit(SCTP_CID_ABORT, (void *)map)
&& !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
&& (sctph.vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
DEBUGP("Verification tag check failed\n");
return -1;
}
oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
for_each_sctp_chunk (skb, sch, offset, count) {
WRITE_LOCK(&sctp_lock);
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch.type == SCTP_CID_INIT) {
/* Sec 8.5.1 (A) */
if (sctph.vtag != 0) {
WRITE_UNLOCK(&sctp_lock);
return -1;
}
} else if (sch.type == SCTP_CID_ABORT) {
/* Sec 8.5.1 (B) */
if (!(sctph.vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
&& !(sctph.vtag == conntrack->proto.sctp.vtag
[1 - CTINFO2DIR(ctinfo)])) {
WRITE_UNLOCK(&sctp_lock);
return -1;
}
} else if (sch.type == SCTP_CID_SHUTDOWN_COMPLETE) {
/* Sec 8.5.1 (C) */
if (!(sctph.vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
&& !(sctph.vtag == conntrack->proto.sctp.vtag
[1 - CTINFO2DIR(ctinfo)]
&& (sch.flags & 1))) {
WRITE_UNLOCK(&sctp_lock);
return -1;
}
} else if (sch.type == SCTP_CID_COOKIE_ECHO) {
/* Sec 8.5.1 (D) */
if (!(sctph.vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
WRITE_UNLOCK(&sctp_lock);
return -1;
}
}
oldsctpstate = conntrack->proto.sctp.state;
newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch.type);
/* Invalid */
if (newconntrack == SCTP_CONNTRACK_MAX) {
DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
CTINFO2DIR(ctinfo), sch.type, oldsctpstate);
WRITE_UNLOCK(&sctp_lock);
return -1;
}
/* If it is an INIT or an INIT ACK note down the vtag */
if (sch.type == SCTP_CID_INIT
|| sch.type == SCTP_CID_INIT_ACK) {
sctp_inithdr_t inithdr;
if (skb_copy_bits(skb, offset + sizeof (sctp_chunkhdr_t),
&inithdr, sizeof(inithdr)) != 0) {
WRITE_UNLOCK(&sctp_lock);
return -1;
}
DEBUGP("Setting vtag %x for dir %d\n",
inithdr.init_tag, CTINFO2DIR(ctinfo));
conntrack->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = inithdr.init_tag;
}
conntrack->proto.sctp.state = newconntrack;
WRITE_UNLOCK(&sctp_lock);
}
ip_ct_refresh(conntrack, *sctp_timeouts[newconntrack]);
if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
&& CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
DEBUGP("Setting assured bit\n");
set_bit(IPS_ASSURED_BIT, &conntrack->status);
}
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static int sctp_new(struct ip_conntrack *conntrack,
const struct sk_buff *skb)
{
enum sctp_conntrack newconntrack;
sctp_sctphdr_t sctph;
sctp_chunkhdr_t sch;
u_int32_t offset, count;
char map[256 / sizeof (char)] = {0};
DEBUGP(__FUNCTION__);
DEBUGP("\n");
if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &sctph, sizeof(sctph)) != 0)
return -1;
if (do_basic_checks(conntrack, skb, map) != 0)
return -1;
/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
if ((test_bit (SCTP_CID_ABORT, (void *)map))
|| (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
|| (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
return -1;
}
newconntrack = SCTP_CONNTRACK_MAX;
for_each_sctp_chunk (skb, sch, offset, count) {
/* Don't need lock here: this conntrack not in circulation yet */
newconntrack = new_state (IP_CT_DIR_ORIGINAL,
SCTP_CONNTRACK_NONE, sch.type);
/* Invalid: delete conntrack */
if (newconntrack == SCTP_CONNTRACK_MAX) {
DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
return 0;
}
/* Copy the vtag into the state info */
if (sch.type == SCTP_CID_INIT) {
if (sctph.vtag == 0) {
sctp_inithdr_t inithdr;
if (skb_copy_bits(skb, offset + sizeof (sctp_chunkhdr_t),
&inithdr, sizeof(inithdr)) != 0) {
return -1;
}
DEBUGP("Setting vtag %x for new conn\n",
inithdr.init_tag);
conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
inithdr.init_tag;
} else {
/* Sec 8.5.1 (A) */
return -1;
}
}
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
else {
DEBUGP("Setting vtag %x for new conn OOTB\n",
sctph.vtag);
conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sctph.vtag;
}
conntrack->proto.sctp.state = newconntrack;
}
return 1;
}
static int sctp_exp_matches_pkt(struct ip_conntrack_expect *exp,
const struct sk_buff *skb)
{
/* To be implemented */
return 0;
}
struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
.list = { NULL, NULL },
.proto = IPPROTO_SCTP,
.name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
.print_tuple = sctp_print_tuple,
.print_conntrack = sctp_print_conntrack,
.packet = sctp_packet,
.new = sctp_new,
.destroy = NULL,
.exp_matches_pkt = sctp_exp_matches_pkt,
.me = THIS_MODULE
};
#ifdef CONFIG_SYSCTL
static ctl_table ip_ct_sysctl_table[] = {
{
.ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
.procname = "ip_conntrack_sctp_timeout_closed",
.data = &ip_ct_sctp_timeout_closed,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
.procname = "ip_conntrack_sctp_timeout_cookie_wait",
.data = &ip_ct_sctp_timeout_cookie_wait,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
.procname = "ip_conntrack_sctp_timeout_cookie_echoed",
.data = &ip_ct_sctp_timeout_cookie_echoed,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
.procname = "ip_conntrack_sctp_timeout_established",
.data = &ip_ct_sctp_timeout_established,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
.procname = "ip_conntrack_sctp_timeout_shutdown_sent",
.data = &ip_ct_sctp_timeout_shutdown_sent,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
.procname = "ip_conntrack_sctp_timeout_shutdown_recd",
.data = &ip_ct_sctp_timeout_shutdown_recd,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
.procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
.data = &ip_ct_sctp_timeout_shutdown_ack_sent,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{ .ctl_name = 0 }
};
static ctl_table ip_ct_netfilter_table[] = {
{
.ctl_name = NET_IPV4_NETFILTER,
.procname = "netfilter",
.mode = 0555,
.child = ip_ct_sysctl_table,
},
{ .ctl_name = 0 }
};
static ctl_table ip_ct_ipv4_table[] = {
{
.ctl_name = NET_IPV4,
.procname = "ipv4",
.mode = 0555,
.child = ip_ct_netfilter_table,
},
{ .ctl_name = 0 }
};
static ctl_table ip_ct_net_table[] = {
{
.ctl_name = CTL_NET,
.procname = "net",
.mode = 0555,
.child = ip_ct_ipv4_table,
},
{ .ctl_name = 0 }
};
static struct ctl_table_header *ip_ct_sysctl_header;
#endif
int __init init(void)
{
int ret;
ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp);
if (ret) {
printk("ip_conntrack_proto_sctp: protocol register failed\n");
goto out;
}
#ifdef CONFIG_SYSCTL
ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
if (ip_ct_sysctl_header == NULL) {
printk("ip_conntrack_proto_sctp: can't register to sysctl.\n");
goto cleanup;
}
#endif
return ret;
cleanup:
#ifdef CONFIG_SYSCTL
ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
#endif
out:
DEBUGP("SCTP conntrack module loading %s\n",
ret ? "failed": "succeeded");
return ret;
}
void __exit fini(void)
{
ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(ip_ct_sysctl_header);
#endif
DEBUGP("SCTP conntrack module unloaded\n");
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kiran Kumar Immidi");
MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
......@@ -4,8 +4,22 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
* - Real stateful connection tracking
* - Modified state transitions table
* - Window scaling support added
* - SACK support added
*
* Willy Tarreau:
* - State table bugfixes
* - More robust state changes
* - Tuning timer parameters
*
* version 2.2
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
......@@ -14,16 +28,18 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/string.h>
#include <linux/spinlock.h>
#include <net/tcp.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/lockhelp.h>
#if 0
#define DEBUGP printk
#define DEBUGP_VARS
#else
#define DEBUGP(format, args...)
#endif
......@@ -31,28 +47,40 @@
/* Protects conntrack->proto.tcp */
static DECLARE_RWLOCK(tcp_lock);
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
int ip_ct_tcp_be_liberal = 0;
/* Actually, I believe that neither ipmasq (where this code is stolen
from) nor ipfilter do it exactly right. A new conntrack machine taking
into account packet loss (which creates uncertainty as to exactly
the conntrack of the connection) is required. RSN. --RR */
/* When connection is picked up from the middle, how many packets are required
to pass in each direction when we assume we are in sync - if any side uses
window scaling, we lost the game.
If it is set to zero, we disable picking up already established
connections. */
int ip_ct_tcp_loose = 3;
/* Max number of the retransmitted packets without receiving an (acceptable)
ACK from the destination. If this number is reached, a shorter timer
will be started. */
int ip_ct_tcp_max_retrans = 3;
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */
static const char *tcp_conntrack_names[] = {
"NONE",
"ESTABLISHED",
"SYN_SENT",
"SYN_RECV",
"ESTABLISHED",
"FIN_WAIT",
"TIME_WAIT",
"CLOSE",
"CLOSE_WAIT",
"LAST_ACK",
"TIME_WAIT",
"CLOSE",
"LISTEN"
};
#define SECS *HZ
#define SECS * HZ
#define MINS * 60 SECS
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
......@@ -66,55 +94,204 @@ unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS;
unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS;
unsigned long ip_ct_tcp_timeout_close = 10 SECS;
/* RFC1122 says the R2 limit should be at least 100 seconds.
Linux uses 15 packets as limit, which corresponds
to ~13-30min depending on RTO. */
unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS;
static unsigned long * tcp_timeouts[]
= { NULL, /* TCP_CONNTRACK_NONE */
&ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
&ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
&ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
&ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
&ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */
&ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
&ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
&ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */
&ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */
&ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
&ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
NULL, /* TCP_CONNTRACK_LISTEN */
};
#define sNO TCP_CONNTRACK_NONE
#define sES TCP_CONNTRACK_ESTABLISHED
#define sSS TCP_CONNTRACK_SYN_SENT
#define sSR TCP_CONNTRACK_SYN_RECV
#define sES TCP_CONNTRACK_ESTABLISHED
#define sFW TCP_CONNTRACK_FIN_WAIT
#define sTW TCP_CONNTRACK_TIME_WAIT
#define sCL TCP_CONNTRACK_CLOSE
#define sCW TCP_CONNTRACK_CLOSE_WAIT
#define sLA TCP_CONNTRACK_LAST_ACK
#define sTW TCP_CONNTRACK_TIME_WAIT
#define sCL TCP_CONNTRACK_CLOSE
#define sLI TCP_CONNTRACK_LISTEN
#define sIV TCP_CONNTRACK_MAX
#define sIG TCP_CONNTRACK_IGNORE
static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = {
/* What TCP flags are set from RST/SYN/FIN/ACK. */
enum tcp_bit_set {
TCP_SYN_SET,
TCP_SYNACK_SET,
TCP_FIN_SET,
TCP_ACK_SET,
TCP_RST_SET,
TCP_NONE_SET,
};
/*
* The TCP state transition table needs a few words...
*
* We are the man in the middle. All the packets go through us
* but might get lost in transit to the destination.
* It is assumed that the destinations can't receive segments
* we haven't seen.
*
* The checked segment is in window, but our windows are *not*
* equivalent with the ones of the sender/receiver. We always
* try to guess the state of the current sender.
*
* The meaning of the states are:
*
* NONE: initial state
* SYN_SENT: SYN-only packet seen
* SYN_RECV: SYN-ACK packet seen
* ESTABLISHED: ACK packet seen
* FIN_WAIT: FIN packet seen
* CLOSE_WAIT: ACK seen (after FIN)
* LAST_ACK: FIN seen (after FIN)
* TIME_WAIT: last ACK seen
* CLOSE: closed connection
*
* LISTEN state is not used.
*
* Packets marked as IGNORED (sIG):
* if they may be either invalid or valid
* and the receiver may send back a connection
* closing RST or a SYN/ACK.
*
* Packets marked as INVALID (sIV):
* if they are invalid
* or we do not support the request (simultaneous open)
*/
static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */
/*syn*/ {sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI },
/*fin*/ {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI },
/*ack*/ {sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES },
/*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL },
/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
/* ORIGINAL */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
/*
* sNO -> sSS Initialize a new connection
* sSS -> sSS Retransmitted SYN
* sSR -> sIG Late retransmitted SYN?
* sES -> sIG Error: SYNs in window outside the SYN_SENT state
* are errors. Receiver will reply with RST
* and close the connection.
* Or we are not in sync and hold a dead connection.
* sFW -> sIG
* sCW -> sIG
* sLA -> sIG
* sTW -> sSS Reopened connection (RFC 1122).
* sCL -> sSS
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
/*
* A SYN/ACK from the client is always invalid:
* - either it tries to set up a simultaneous open, which is
* not supported;
* - or the firewall has just been inserted between the two hosts
* during the session set-up. The SYN will be retransmitted
* by the true client (or it'll time out).
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
* sNO -> sIV Too late and no reason to do anything...
* sSS -> sIV Client migth not send FIN in this state:
* we enforce waiting for a SYN/ACK reply first.
* sSR -> sFW Close started.
* sES -> sFW
* sFW -> sLA FIN seen in both directions, waiting for
* the last ACK.
* Migth be a retransmitted FIN as well...
* sCW -> sLA
* sLA -> sLA Retransmitted FIN. Remain in the same state.
* sTW -> sTW
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*
* sNO -> sES Assumed.
* sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
* sSR -> sES Established state is reached.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
* sCW -> sCW
* sLA -> sTW Last ACK detected.
* sTW -> sTW Retransmitted last ACK. Remain in the same state.
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
},
{
/* REPLY */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */
/*syn*/ {sSR, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR },
/*fin*/ {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI },
/*ack*/ {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI },
/*rst*/ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sLA, sLI },
/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
}
/* REPLY */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
/*
* sNO -> sIV Never reached.
* sSS -> sIV Simultaneous open, not supported
* sSR -> sIV Simultaneous open, not supported.
* sES -> sIV Server may not initiate a connection.
* sFW -> sIV
* sCW -> sIV
* sLA -> sIV
* sTW -> sIV Reopened connection, but server may not do it.
* sCL -> sIV
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
/*
* sSS -> sSR Standard open.
* sSR -> sSR Retransmitted SYN/ACK.
* sES -> sIG Late retransmitted SYN/ACK?
* sFW -> sIG
* sCW -> sIG
* sLA -> sIG
* sTW -> sIG
* sCL -> sIG
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
* sSS -> sIV Server might not send FIN in this state.
* sSR -> sFW Close started.
* sES -> sFW
* sFW -> sLA FIN seen in both directions.
* sCW -> sLA
* sLA -> sLA Retransmitted FIN.
* sTW -> sTW
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*ack*/ { sIV, sIV, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*
* sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
* sSR -> sIV Simultaneous open.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
* sCW -> sCW
* sLA -> sTW Last ACK detected.
* sTW -> sTW Retransmitted last ACK.
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
}
};
static int tcp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct ip_conntrack_tuple *tuple)
unsigned int dataoff,
struct ip_conntrack_tuple *tuple)
{
struct tcphdr hdr;
......@@ -160,11 +337,488 @@ static unsigned int tcp_print_conntrack(char *buffer,
static unsigned int get_conntrack_index(const struct tcphdr *tcph)
{
if (tcph->rst) return 3;
else if (tcph->syn) return 0;
else if (tcph->fin) return 1;
else if (tcph->ack) return 2;
else return 4;
if (tcph->rst) return TCP_RST_SET;
else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
else if (tcph->fin) return TCP_FIN_SET;
else if (tcph->ack) return TCP_ACK_SET;
else return TCP_NONE_SET;
}
/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
in IP Filter' by Guido van Rooij.
http://www.nluug.nl/events/sane2000/papers.html
http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
The boundaries and the conditions are slightly changed:
td_maxend = max(sack + max(win,1)) seen in reply packets
td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
td_end = max(seq + len) seen in sent packets
I. Upper bound for valid data: seq + len <= sender.td_maxend
II. Lower bound for valid data: seq >= sender.td_end - receiver.td_maxwin
III. Upper bound for valid ack: sack <= receiver.td_end
IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
where sack is the highest right edge of sack block found in the packet.
The upper bound limit for a valid ack is not ignored -
we doesn't have to deal with fragments.
*/
static inline __u32 segment_seq_plus_len(__u32 seq,
size_t len,
struct iphdr *iph,
struct tcphdr *tcph)
{
return (seq + len - (iph->ihl + tcph->doff)*4
+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
}
/* Fixme: what about big packets? */
#define MAXACKWINCONST 66000
#define MAXACKWINDOW(sender) \
((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
: MAXACKWINCONST)
/*
* Simplified tcp_parse_options routine from tcp_input.c
*/
static void tcp_options(struct tcphdr *tcph,
struct ip_ct_tcp_state *state)
{
unsigned char *ptr = (unsigned char *)(tcph + 1);
int length = (tcph->doff*4) - sizeof(struct tcphdr);
state->td_scale =
state->flags = 0;
while (length > 0) {
int opcode=*ptr++;
int opsize;
switch (opcode) {
case TCPOPT_EOL:
return;
case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
length--;
continue;
default:
opsize=*ptr++;
if (opsize < 2) /* "silly options" */
return;
if (opsize > length)
break; /* don't parse partial options */
if (opcode == TCPOPT_SACK_PERM
&& opsize == TCPOLEN_SACK_PERM)
state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
else if (opcode == TCPOPT_WINDOW
&& opsize == TCPOLEN_WINDOW) {
state->td_scale = *(u_int8_t *)ptr;
if (state->td_scale > 14) {
/* See RFC1323 */
state->td_scale = 14;
}
state->flags |=
IP_CT_TCP_STATE_FLAG_WINDOW_SCALE;
}
ptr += opsize - 2;
length -= opsize;
}
}
}
static void tcp_sack(struct tcphdr *tcph, __u32 *sack)
{
__u32 tmp;
unsigned char *ptr;
int length = (tcph->doff*4) - sizeof(struct tcphdr);
/* Fast path for timestamp-only option */
if (length == TCPOLEN_TSTAMP_ALIGNED*4
&& *(__u32 *)(tcph + 1) ==
__constant_ntohl((TCPOPT_NOP << 24)
| (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8)
| TCPOLEN_TIMESTAMP))
return;
ptr = (unsigned char *)(tcph + 1);
while (length > 0) {
int opcode=*ptr++;
int opsize, i;
switch (opcode) {
case TCPOPT_EOL:
return;
case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
length--;
continue;
default:
opsize=*ptr++;
if (opsize < 2) /* "silly options" */
return;
if (opsize > length)
break; /* don't parse partial options */
if (opcode == TCPOPT_SACK
&& opsize >= (TCPOLEN_SACK_BASE
+ TCPOLEN_SACK_PERBLOCK)
&& !((opsize - TCPOLEN_SACK_BASE)
% TCPOLEN_SACK_PERBLOCK)) {
for (i = 0;
i < (opsize - TCPOLEN_SACK_BASE);
i += TCPOLEN_SACK_PERBLOCK) {
tmp = ntohl(*((u_int32_t *)(ptr+i)+1));
if (after(tmp, *sack))
*sack = tmp;
}
return;
}
ptr += opsize - 2;
length -= opsize;
}
}
}
static int tcp_in_window(struct ip_ct_tcp *state,
enum ip_conntrack_dir dir,
unsigned int *index,
const struct sk_buff *skb,
struct iphdr *iph,
struct tcphdr *tcph)
{
struct ip_ct_tcp_state *sender = &state->seen[dir];
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
__u32 seq, ack, sack, end, win, swin;
int res;
/*
* Get the required data from the packet.
*/
seq = ntohl(tcph->seq);
ack = sack = ntohl(tcph->ack_seq);
win = ntohs(tcph->window);
end = segment_seq_plus_len(seq, skb->len, iph, tcph);
if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
tcp_sack(tcph, &sack);
DEBUGP("tcp_in_window: START\n");
DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
"seq=%u ack=%u sack=%u win=%u end=%u\n",
NIPQUAD(iph->saddr), ntohs(tcph->source),
NIPQUAD(iph->daddr), ntohs(tcph->dest),
seq, ack, sack, win, end);
DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
sender->td_scale,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
if (sender->td_end == 0) {
/*
* Initialize sender data.
*/
if (tcph->syn && tcph->ack) {
/*
* Outgoing SYN-ACK in reply to a SYN.
*/
sender->td_end =
sender->td_maxend = end;
sender->td_maxwin = (win == 0 ? 1 : win);
tcp_options(tcph, sender);
/*
* RFC 1323:
* Both sides must send the Window Scale option
* to enable window scaling in either direction.
*/
if (!(sender->flags & IP_CT_TCP_STATE_FLAG_WINDOW_SCALE
&& receiver->flags & IP_CT_TCP_STATE_FLAG_WINDOW_SCALE))
sender->td_scale =
receiver->td_scale = 0;
} else {
/*
* We are in the middle of a connection,
* its history is lost for us.
* Let's try to use the data from the packet.
*/
sender->td_end = end;
sender->td_maxwin = (win == 0 ? 1 : win);
sender->td_maxend = end + sender->td_maxwin;
}
} else if (state->state == TCP_CONNTRACK_SYN_SENT
&& dir == IP_CT_DIR_ORIGINAL
&& after(end, sender->td_end)) {
/*
* RFC 793: "if a TCP is reinitialized ... then it need
* not wait at all; it must only be sure to use sequence
* numbers larger than those recently used."
*/
sender->td_end =
sender->td_maxend = end;
sender->td_maxwin = (win == 0 ? 1 : win);
tcp_options(tcph, sender);
}
if (!(tcph->ack)) {
/*
* If there is no ACK, just pretend it was set and OK.
*/
ack = sack = receiver->td_end;
} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
(TCP_FLAG_ACK|TCP_FLAG_RST))
&& (ack == 0)) {
/*
* Broken TCP stacks, that set ACK in RST packets as well
* with zero ack value.
*/
ack = sack = receiver->td_end;
}
if (seq == end)
/*
* Packets contains no data: we assume it is valid
* and check the ack value only.
*/
seq = end = sender->td_end;
DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
"seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n",
NIPQUAD(iph->saddr), ntohs(tcph->source),
NIPQUAD(iph->daddr), ntohs(tcph->dest),
seq, ack, sack, win, end,
after(end, sender->td_maxend) && before(seq, sender->td_maxend)
? sender->td_maxend : end);
DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
sender->td_scale,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
/* Ignore data over the right edge of the receiver's window. */
if (after(end, sender->td_maxend) &&
before(seq, sender->td_maxend)) {
end = sender->td_maxend;
if (*index == TCP_FIN_SET)
*index = TCP_ACK_SET;
}
DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
before(end, sender->td_maxend + 1)
|| before(seq, sender->td_maxend + 1),
after(seq, sender->td_end - receiver->td_maxwin - 1)
|| after(end, sender->td_end - receiver->td_maxwin - 1),
before(sack, receiver->td_end + 1),
after(ack, receiver->td_end - MAXACKWINDOW(sender)));
if (sender->loose || receiver->loose ||
(before(end, sender->td_maxend + 1) &&
after(seq, sender->td_end - receiver->td_maxwin - 1) &&
before(sack, receiver->td_end + 1) &&
after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
/*
* Take into account window scaling (RFC 1323).
*/
if (!tcph->syn)
win <<= sender->td_scale;
/*
* Update sender data.
*/
swin = win + (sack - ack);
if (sender->td_maxwin < swin)
sender->td_maxwin = swin;
if (after(end, sender->td_end))
sender->td_end = end;
if (after(sack + win, receiver->td_maxend - 1)) {
receiver->td_maxend = sack + win;
if (win == 0)
receiver->td_maxend++;
}
/*
* Check retransmissions.
*/
if (*index == TCP_ACK_SET) {
if (state->last_dir == dir
&& state->last_seq == seq
&& state->last_end == end)
state->retrans++;
else {
state->last_dir = dir;
state->last_seq = seq;
state->last_end = end;
state->retrans = 0;
}
}
/*
* Close the window of disabled window tracking :-)
*/
if (sender->loose)
sender->loose--;
res = 1;
} else {
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: %s ",
before(end, sender->td_maxend + 1) ?
after(seq, sender->td_end - receiver->td_maxwin - 1) ?
before(ack, receiver->td_end + 1) ?
after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
: "ACK is under the lower bound (possibly overly delayed ACK)"
: "ACK is over the upper bound (ACKed data has never seen yet)"
: "SEQ is under the lower bound (retransmitted already ACKed data)"
: "SEQ is over the upper bound (over the window of the receiver)");
res = ip_ct_tcp_be_liberal && !tcph->rst;
}
DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
"receiver end=%u maxend=%u maxwin=%u\n",
res, sender->td_end, sender->td_maxend, sender->td_maxwin,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
return res;
}
#ifdef CONFIG_IP_NF_NAT_NEEDED
/* Update sender->td_end after NAT successfully mangled the packet */
int ip_conntrack_tcp_update(struct sk_buff *skb,
struct ip_conntrack *conntrack,
int dir)
{
struct iphdr *iph = skb->nh.iph;
struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
__u32 end;
#ifdef DEBUGP_VARS
struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
#endif
end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
WRITE_LOCK(&tcp_lock);
/*
* We have to worry for the ack in the reply packet only...
*/
if (after(end, conntrack->proto.tcp.seen[dir].td_end))
conntrack->proto.tcp.seen[dir].td_end = end;
conntrack->proto.tcp.last_end = end;
WRITE_UNLOCK(&tcp_lock);
DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
sender->td_scale,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
return 1;
}
EXPORT_SYMBOL(ip_conntrack_tcp_update);
#endif
#define TH_FIN 0x01
#define TH_SYN 0x02
#define TH_RST 0x04
#define TH_PUSH 0x08
#define TH_ACK 0x10
#define TH_URG 0x20
#define TH_ECE 0x40
#define TH_CWR 0x80
/* table of valid flag combinations - ECE and CWR are always valid */
static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
{
[TH_SYN] = 1,
[TH_SYN|TH_ACK] = 1,
[TH_RST] = 1,
[TH_RST|TH_ACK] = 1,
[TH_RST|TH_ACK|TH_PUSH] = 1,
[TH_FIN|TH_ACK] = 1,
[TH_ACK] = 1,
[TH_ACK|TH_PUSH] = 1,
[TH_ACK|TH_URG] = 1,
[TH_ACK|TH_URG|TH_PUSH] = 1,
[TH_FIN|TH_ACK|TH_PUSH] = 1,
[TH_FIN|TH_ACK|TH_URG] = 1,
[TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
};
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
static int tcp_error(struct sk_buff *skb,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
struct iphdr *iph = skb->nh.iph;
struct tcphdr tcph;
unsigned int tcplen = skb->len - iph->ihl * 4;
u_int8_t tcpflags;
/* Smaller that minimal TCP header? */
if (skb_copy_bits(skb, iph->ihl * 4, &tcph, sizeof(tcph)) != 0) {
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: short packet ");
return -NF_ACCEPT;
}
/* Not whole TCP header or malformed packet */
if (tcph.doff*4 < sizeof(struct tcphdr) || tcplen < tcph.doff*4) {
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: truncated/malformed packet ");
return -NF_ACCEPT;
}
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
* because the semantic of CHECKSUM_HW is different there
* and moreover root might send raw packets.
*/
/* FIXME: Source route IP option packets --RR */
if (hooknum == NF_IP_PRE_ROUTING
&& csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
skb->ip_summed == CHECKSUM_HW ? skb->csum
: skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: bad TCP checksum ");
return -NF_ACCEPT;
}
/* Check TCP flags. */
tcpflags = (((u_int8_t *)&tcph)[13] & ~(TH_ECE|TH_CWR));
if (!tcp_valid_flags[tcpflags]) {
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: invalid TCP flag combination ");
return -NF_ACCEPT;
}
return NF_ACCEPT;
}
static inline void copy_whole_tcp_header(const struct sk_buff *skb,
unsigned char *buff)
{
struct iphdr *iph = skb->nh.iph;
struct tcphdr *tcph = (struct tcphdr *)buff;
/* tcp_error guarantees for us that the packet is not malformed */
skb_copy_bits(skb, iph->ihl * 4, buff, sizeof(*tcph));
skb_copy_bits(skb, iph->ihl * 4 + sizeof(*tcph),
buff + sizeof(*tcph),
tcph->doff * 4 - sizeof(*tcph));
}
/* Returns verdict for packet, or -1 for invalid. */
......@@ -172,88 +826,232 @@ static int tcp_packet(struct ip_conntrack *conntrack,
const struct sk_buff *skb,
enum ip_conntrack_info ctinfo)
{
enum tcp_conntrack newconntrack, oldtcpstate;
struct tcphdr tcph;
enum tcp_conntrack new_state, old_state;
enum ip_conntrack_dir dir;
struct iphdr *iph = skb->nh.iph;
unsigned char buff[15 * 4];
struct tcphdr *tcph = (struct tcphdr *)buff;
unsigned long timeout;
unsigned int index;
copy_whole_tcp_header(skb, buff);
WRITE_LOCK(&tcp_lock);
old_state = conntrack->proto.tcp.state;
dir = CTINFO2DIR(ctinfo);
index = get_conntrack_index(tcph);
new_state = tcp_conntracks[dir][index][old_state];
if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
return -1;
if (skb->len < skb->nh.iph->ihl * 4 + tcph.doff * 4)
return -1;
/* If only reply is a RST, we can consider ourselves not to
have an established connection: this is a fairly common
problem case, so we can delete the conntrack
immediately. --RR */
if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && tcph.rst) {
if (del_timer(&conntrack->timeout))
conntrack->timeout.function((unsigned long)conntrack);
switch (new_state) {
case TCP_CONNTRACK_IGNORE:
/* Either SYN in ORIGINAL, or SYN/ACK in REPLY direction. */
if (index == TCP_SYNACK_SET
&& conntrack->proto.tcp.last_index == TCP_SYN_SET
&& conntrack->proto.tcp.last_dir != dir
&& after(ntohl(tcph->ack_seq),
conntrack->proto.tcp.last_seq)) {
/* This SYN/ACK acknowledges a SYN that we earlier
* ignored as invalid. This means that the client and
* the server are both in sync, while the firewall is
* not. We kill this session and block the SYN/ACK so
* that the client cannot but retransmit its SYN and
* thus initiate a clean new session.
*/
WRITE_UNLOCK(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: killing out of sync session ");
if (del_timer(&conntrack->timeout))
conntrack->timeout.function((unsigned long)
conntrack);
return -NF_DROP;
}
conntrack->proto.tcp.last_index = index;
conntrack->proto.tcp.last_dir = dir;
conntrack->proto.tcp.last_seq = ntohl(tcph->seq);
WRITE_UNLOCK(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: invalid SYN (ignored) ");
return NF_ACCEPT;
case TCP_CONNTRACK_MAX:
/* Invalid packet */
DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(tcph),
old_state);
WRITE_UNLOCK(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: invalid state ");
return -NF_ACCEPT;
case TCP_CONNTRACK_SYN_SENT:
if (old_state >= TCP_CONNTRACK_TIME_WAIT) {
/* Attempt to reopen a closed connection.
* Delete this connection and look up again. */
WRITE_UNLOCK(&tcp_lock);
if (del_timer(&conntrack->timeout))
conntrack->timeout.function((unsigned long)
conntrack);
return -NF_REPEAT;
}
break;
case TCP_CONNTRACK_CLOSE:
if (index == TCP_RST_SET
&& test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
&& conntrack->proto.tcp.last_index <= TCP_SYNACK_SET
&& after(ntohl(tcph->ack_seq),
conntrack->proto.tcp.last_seq)) {
/* Ignore RST closing down invalid SYN
we had let trough. */
WRITE_UNLOCK(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: invalid RST (ignored) ");
return NF_ACCEPT;
}
/* Just fall trough */
default:
/* Keep compilers happy. */
break;
}
WRITE_LOCK(&tcp_lock);
oldtcpstate = conntrack->proto.tcp.state;
newconntrack
= tcp_conntracks
[CTINFO2DIR(ctinfo)]
[get_conntrack_index(&tcph)][oldtcpstate];
/* Invalid */
if (newconntrack == TCP_CONNTRACK_MAX) {
DEBUGP("ip_conntrack_tcp: Invalid dir=%i index=%u conntrack=%u\n",
CTINFO2DIR(ctinfo), get_conntrack_index(&tcph),
conntrack->proto.tcp.state);
if (!tcp_in_window(&conntrack->proto.tcp, dir, &index,
skb, iph, tcph)) {
WRITE_UNLOCK(&tcp_lock);
return -1;
return -NF_ACCEPT;
}
/* From now on we have got in-window packets */
/* If FIN was trimmed off, we don't change state. */
conntrack->proto.tcp.last_index = index;
new_state = tcp_conntracks[dir][index][old_state];
conntrack->proto.tcp.state = newconntrack;
/* Poor man's window tracking: record SYN/ACK for handshake check */
if (oldtcpstate == TCP_CONNTRACK_SYN_SENT
&& CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
&& tcph.syn && tcph.ack) {
conntrack->proto.tcp.handshake_ack
= htonl(ntohl(tcph.seq) + 1);
goto out;
}
DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
"syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
NIPQUAD(iph->saddr), ntohs(tcph->source),
NIPQUAD(iph->daddr), ntohs(tcph->dest),
(tcph->syn ? 1 : 0), (tcph->ack ? 1 : 0),
(tcph->fin ? 1 : 0), (tcph->rst ? 1 : 0),
old_state, new_state);
/* Set ASSURED if we see valid ack in ESTABLISHED after SYN_RECV */
if (oldtcpstate == TCP_CONNTRACK_SYN_RECV
&& CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL
&& tcph.ack && !tcph.syn
&& tcph.ack_seq == conntrack->proto.tcp.handshake_ack)
set_bit(IPS_ASSURED_BIT, &conntrack->status);
conntrack->proto.tcp.state = new_state;
timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
&& *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
WRITE_UNLOCK(&tcp_lock);
out: WRITE_UNLOCK(&tcp_lock);
ip_ct_refresh(conntrack, *tcp_timeouts[newconntrack]);
if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
/* If only reply is a RST, we can consider ourselves not to
have an established connection: this is a fairly common
problem case, so we can delete the conntrack
immediately. --RR */
if (tcph->rst) {
if (del_timer(&conntrack->timeout))
conntrack->timeout.function((unsigned long)
conntrack);
return NF_ACCEPT;
}
} else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
&& (old_state == TCP_CONNTRACK_SYN_RECV
|| old_state == TCP_CONNTRACK_ESTABLISHED)
&& new_state == TCP_CONNTRACK_ESTABLISHED) {
/* Set ASSURED if we see see valid ack in ESTABLISHED
after SYN_RECV or a valid answer for a picked up
connection. */
set_bit(IPS_ASSURED_BIT, &conntrack->status);
}
ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static int tcp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
/* Called when a new connection for this protocol found. */
static int tcp_new(struct ip_conntrack *conntrack,
const struct sk_buff *skb)
{
enum tcp_conntrack newconntrack;
struct tcphdr tcph;
if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
return -1;
enum tcp_conntrack new_state;
struct iphdr *iph = skb->nh.iph;
unsigned char buff[15 * 4];
struct tcphdr *tcph = (struct tcphdr *)buff;
#ifdef DEBUGP_VARS
struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
#endif
copy_whole_tcp_header(skb, buff);
/* Don't need lock here: this conntrack not in circulation yet */
newconntrack
= tcp_conntracks[0][get_conntrack_index(&tcph)]
new_state
= tcp_conntracks[0][get_conntrack_index(tcph)]
[TCP_CONNTRACK_NONE];
/* Invalid: delete conntrack */
if (newconntrack == TCP_CONNTRACK_MAX) {
DEBUGP("ip_conntrack_tcp: invalid new deleting.\n");
if (new_state >= TCP_CONNTRACK_MAX) {
DEBUGP("ip_ct_tcp: invalid new deleting.\n");
return 0;
}
if (new_state == TCP_CONNTRACK_SYN_SENT) {
/* SYN packet */
conntrack->proto.tcp.seen[0].td_end =
segment_seq_plus_len(ntohl(tcph->seq), skb->len,
iph, tcph);
conntrack->proto.tcp.seen[0].td_maxwin = ntohs(tcph->window);
if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
conntrack->proto.tcp.seen[0].td_maxwin = 1;
conntrack->proto.tcp.seen[0].td_maxend =
conntrack->proto.tcp.seen[0].td_end;
tcp_options(tcph, &conntrack->proto.tcp.seen[0]);
conntrack->proto.tcp.seen[1].flags = 0;
conntrack->proto.tcp.seen[0].loose =
conntrack->proto.tcp.seen[1].loose = 0;
} else if (ip_ct_tcp_loose == 0) {
/* Don't try to pick up connections. */
return 0;
} else {
/*
* We are in the middle of a connection,
* its history is lost for us.
* Let's try to use the data from the packet.
*/
conntrack->proto.tcp.seen[0].td_end =
segment_seq_plus_len(ntohl(tcph->seq), skb->len,
iph, tcph);
conntrack->proto.tcp.seen[0].td_maxwin = ntohs(tcph->window);
if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
conntrack->proto.tcp.seen[0].td_maxwin = 1;
conntrack->proto.tcp.seen[0].td_maxend =
conntrack->proto.tcp.seen[0].td_end +
conntrack->proto.tcp.seen[0].td_maxwin;
conntrack->proto.tcp.seen[0].td_scale = 0;
/* We assume SACK. Should we assume window scaling too? */
conntrack->proto.tcp.seen[0].flags =
conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
conntrack->proto.tcp.seen[0].loose =
conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
}
conntrack->proto.tcp.seen[1].td_end = 0;
conntrack->proto.tcp.seen[1].td_maxend = 0;
conntrack->proto.tcp.seen[1].td_maxwin = 1;
conntrack->proto.tcp.seen[1].td_scale = 0;
conntrack->proto.tcp.state = newconntrack;
/* tcp_packet will set them */
conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
conntrack->proto.tcp.last_index = TCP_NONE_SET;
DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
sender->td_scale,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
return 1;
}
static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp,
const struct sk_buff *skb)
{
......@@ -261,14 +1059,23 @@ static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp,
struct tcphdr tcph;
unsigned int datalen;
if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
if (skb_copy_bits(skb, iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
return 0;
datalen = skb->len - iph->ihl*4 - tcph.doff*4;
return between(exp->seq, ntohl(tcph.seq), ntohl(tcph.seq) + datalen);
}
struct ip_conntrack_protocol ip_conntrack_protocol_tcp
= { { NULL, NULL }, IPPROTO_TCP, "tcp",
tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack,
tcp_packet, tcp_new, NULL, tcp_exp_matches_pkt, NULL };
struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
{
.proto = IPPROTO_TCP,
.name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
.print_tuple = tcp_print_tuple,
.print_conntrack = tcp_print_conntrack,
.packet = tcp_packet,
.new = tcp_new,
.exp_matches_pkt = tcp_exp_matches_pkt,
.error = tcp_error,
};
......@@ -12,6 +12,8 @@
#include <linux/netfilter.h>
#include <linux/in.h>
#include <linux/udp.h>
#include <net/checksum.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
unsigned long ip_ct_udp_timeout = 30*HZ;
......@@ -60,16 +62,17 @@ static unsigned int udp_print_conntrack(char *buffer,
/* Returns verdict for packet, and may modify conntracktype */
static int udp_packet(struct ip_conntrack *conntrack,
const struct sk_buff *skb,
enum ip_conntrack_info conntrackinfo)
enum ip_conntrack_info ctinfo)
{
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
ip_ct_refresh(conntrack, ip_ct_udp_timeout_stream);
ip_ct_refresh_acct(conntrack, ctinfo, skb,
ip_ct_udp_timeout_stream);
/* Also, more likely to be important, and not a probe */
set_bit(IPS_ASSURED_BIT, &conntrack->status);
} else
ip_ct_refresh(conntrack, ip_ct_udp_timeout);
ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
return NF_ACCEPT;
}
......@@ -80,7 +83,60 @@ static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
return 1;
}
struct ip_conntrack_protocol ip_conntrack_protocol_udp
= { { NULL, NULL }, IPPROTO_UDP, "udp",
udp_pkt_to_tuple, udp_invert_tuple, udp_print_tuple, udp_print_conntrack,
udp_packet, udp_new, NULL, NULL, NULL };
static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
struct iphdr *iph = skb->nh.iph;
unsigned int udplen = skb->len - iph->ihl * 4;
struct udphdr hdr;
/* Header is too small? */
if (skb_copy_bits(skb, iph->ihl*4, &hdr, sizeof(hdr)) != 0) {
if (LOG_INVALID(IPPROTO_UDP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_udp: short packet ");
return -NF_ACCEPT;
}
/* Truncated/malformed packets */
if (ntohs(hdr.len) > udplen || ntohs(hdr.len) < sizeof(hdr)) {
if (LOG_INVALID(IPPROTO_UDP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_udp: truncated/malformed packet ");
return -NF_ACCEPT;
}
/* Packet with no checksum */
if (!hdr.check)
return NF_ACCEPT;
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
* because the semantic of CHECKSUM_HW is different there
* and moreover root might send raw packets.
* FIXME: Source route IP option packets --RR */
if (hooknum == NF_IP_PRE_ROUTING
&& csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP,
skb->ip_summed == CHECKSUM_HW ? skb->csum
: skb_checksum(skb, iph->ihl*4, udplen, 0))) {
if (LOG_INVALID(IPPROTO_UDP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_udp: bad UDP checksum ");
return -NF_ACCEPT;
}
return NF_ACCEPT;
}
struct ip_conntrack_protocol ip_conntrack_protocol_udp =
{
.proto = IPPROTO_UDP,
.name = "udp",
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
.print_tuple = udp_print_tuple,
.print_conntrack = udp_print_conntrack,
.packet = udp_packet,
.new = udp_new,
.error = udp_error,
};
......@@ -20,6 +20,8 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/percpu.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
......@@ -43,6 +45,9 @@
MODULE_LICENSE("GPL");
extern atomic_t ip_conntrack_count;
DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
static int kill_proto(const struct ip_conntrack *i, void *data)
{
return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
......@@ -63,128 +68,305 @@ print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple,
return len;
}
/* FIXME: Don't print source proto part. --RR */
#ifdef CONFIG_IP_NF_CT_ACCT
static unsigned int
print_expect(char *buffer, const struct ip_conntrack_expect *expect)
seq_print_counters(struct seq_file *s, struct ip_conntrack_counter *counter)
{
unsigned int len;
if (expect->expectant->helper->timeout)
len = sprintf(buffer, "EXPECTING: %lu ",
timer_pending(&expect->timeout)
? (expect->timeout.expires - jiffies)/HZ : 0);
else
len = sprintf(buffer, "EXPECTING: - ");
len += sprintf(buffer + len, "use=%u proto=%u ",
atomic_read(&expect->use), expect->tuple.dst.protonum);
len += print_tuple(buffer + len, &expect->tuple,
__ip_ct_find_proto(expect->tuple.dst.protonum));
len += sprintf(buffer + len, "\n");
return len;
return seq_printf(s, "packets=%llu bytes=%llu ",
counter->packets, counter->bytes);
}
#else
#define seq_print_counters(x, y) 0
#endif
static unsigned int
print_conntrack(char *buffer, struct ip_conntrack *conntrack)
static void *ct_seq_start(struct seq_file *s, loff_t *pos)
{
unsigned int len;
struct ip_conntrack_protocol *proto
= __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.protonum);
unsigned int *bucket;
len = sprintf(buffer, "%-8s %u %lu ",
proto->name,
conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.protonum,
timer_pending(&conntrack->timeout)
? (conntrack->timeout.expires - jiffies)/HZ : 0);
/* strange seq_file api calls stop even if we fail,
* thus we need to grab lock since stop unlocks */
READ_LOCK(&ip_conntrack_lock);
if (*pos >= ip_conntrack_htable_size)
return NULL;
len += proto->print_conntrack(buffer + len, conntrack);
len += print_tuple(buffer + len,
&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
proto);
if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
len += sprintf(buffer + len, "[UNREPLIED] ");
len += print_tuple(buffer + len,
&conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
proto);
if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
len += sprintf(buffer + len, "[ASSURED] ");
len += sprintf(buffer + len, "use=%u ",
atomic_read(&conntrack->ct_general.use));
len += sprintf(buffer + len, "\n");
bucket = kmalloc(sizeof(unsigned int), GFP_KERNEL);
if (!bucket) {
return ERR_PTR(-ENOMEM);
}
*bucket = *pos;
return bucket;
}
static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
unsigned int *bucket = (unsigned int *) v;
return len;
*pos = ++(*bucket);
if (*pos >= ip_conntrack_htable_size) {
kfree(v);
return NULL;
}
return bucket;
}
static void ct_seq_stop(struct seq_file *s, void *v)
{
READ_UNLOCK(&ip_conntrack_lock);
}
/* Returns true when finished. */
static inline int
conntrack_iterate(const struct ip_conntrack_tuple_hash *hash,
char *buffer, off_t offset, off_t *upto,
unsigned int *len, unsigned int maxlen)
/* return 0 on success, 1 in case of error */
static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash,
struct seq_file *s)
{
unsigned int newlen;
IP_NF_ASSERT(hash->ctrack);
struct ip_conntrack *conntrack = hash->ctrack;
struct ip_conntrack_protocol *proto;
char buffer[IP_CT_PRINT_BUFLEN];
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
/* Only count originals */
IP_NF_ASSERT(conntrack);
/* we only want to print DIR_ORIGINAL */
if (DIRECTION(hash))
return 0;
if ((*upto)++ < offset)
return 0;
proto = __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.protonum);
IP_NF_ASSERT(proto);
if (seq_printf(s, "%-8s %u %lu ",
proto->name,
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
timer_pending(&conntrack->timeout)
? (conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
return 1;
proto->print_conntrack(buffer, conntrack);
if (seq_puts(s, buffer))
return 1;
print_tuple(buffer, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
proto);
if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
return 1;
newlen = print_conntrack(buffer + *len, hash->ctrack);
if (*len + newlen > maxlen)
if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
if (seq_printf(s, "[UNREPLIED] "))
return 1;
print_tuple(buffer, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
proto);
if (seq_puts(s, buffer))
return 1;
if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
return 1;
if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
if (seq_printf(s, "[ASSURED] "))
return 1;
if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
return 1;
else *len += newlen;
return 0;
}
static int
list_conntracks(char *buffer, char **start, off_t offset, int length)
static int ct_seq_show(struct seq_file *s, void *v)
{
unsigned int i;
unsigned int len = 0;
off_t upto = 0;
struct list_head *e;
unsigned int *bucket = (unsigned int *) v;
READ_LOCK(&ip_conntrack_lock);
/* Traverse hash; print originals then reply. */
for (i = 0; i < ip_conntrack_htable_size; i++) {
if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate,
struct ip_conntrack_tuple_hash *,
buffer, offset, &upto, &len, length))
goto finished;
if (LIST_FIND(&ip_conntrack_hash[*bucket], ct_seq_real_show,
struct ip_conntrack_tuple_hash *, s)) {
/* buffer was filled and unable to print that tuple */
return 1;
}
return 0;
}
static struct seq_operations ct_seq_ops = {
.start = ct_seq_start,
.next = ct_seq_next,
.stop = ct_seq_stop,
.show = ct_seq_show
};
static int ct_open(struct inode *inode, struct file *file)
{
return seq_open(file, &ct_seq_ops);
}
/* Now iterate through expecteds. */
static struct file_operations ct_file_ops = {
.owner = THIS_MODULE,
.open = ct_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
/* expects */
static void *exp_seq_start(struct seq_file *s, loff_t *pos)
{
struct list_head *e = &ip_conntrack_expect_list;
loff_t i;
/* strange seq_file api calls stop even if we fail,
* thus we need to grab lock since stop unlocks */
READ_LOCK(&ip_conntrack_lock);
READ_LOCK(&ip_conntrack_expect_tuple_lock);
list_for_each(e, &ip_conntrack_expect_list) {
unsigned int last_len;
struct ip_conntrack_expect *expect
= (struct ip_conntrack_expect *)e;
if (upto++ < offset) continue;
last_len = len;
len += print_expect(buffer + len, expect);
if (len > length) {
len = last_len;
goto finished_expects;
}
if (list_empty(e))
return NULL;
for (i = 0; i <= *pos; i++) {
e = e->next;
if (e == &ip_conntrack_expect_list)
return NULL;
}
return e;
}
finished_expects:
static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct list_head *e = v;
e = e->next;
if (e == &ip_conntrack_expect_list)
return NULL;
return e;
}
static void exp_seq_stop(struct seq_file *s, void *v)
{
READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
finished:
READ_UNLOCK(&ip_conntrack_lock);
}
/* `start' hack - see fs/proc/generic.c line ~165 */
*start = (char *)((unsigned int)upto - offset);
return len;
static int exp_seq_show(struct seq_file *s, void *v)
{
struct ip_conntrack_expect *expect = v;
char buffer[IP_CT_PRINT_BUFLEN];
if (expect->expectant->helper->timeout)
seq_printf(s, "%lu ", timer_pending(&expect->timeout)
? (expect->timeout.expires - jiffies)/HZ : 0);
else
seq_printf(s, "- ");
seq_printf(s, "use=%u proto=%u ", atomic_read(&expect->use),
expect->tuple.dst.protonum);
print_tuple(buffer, &expect->tuple,
__ip_ct_find_proto(expect->tuple.dst.protonum));
return seq_printf(s, "%s\n", buffer);
}
static struct seq_operations exp_seq_ops = {
.start = exp_seq_start,
.next = exp_seq_next,
.stop = exp_seq_stop,
.show = exp_seq_show
};
static int exp_open(struct inode *inode, struct file *file)
{
return seq_open(file, &exp_seq_ops);
}
static struct file_operations exp_file_ops = {
.owner = THIS_MODULE,
.open = exp_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu;
return &per_cpu(ip_conntrack_stat, cpu);
}
return NULL;
}
static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
int cpu;
for (cpu = *pos + 1; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu;
return &per_cpu(ip_conntrack_stat, cpu);
}
return NULL;
}
static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
{
}
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
struct ip_conntrack_stat *st = v;
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x %08x %08x %08x %08x %08x %08x \n",
nr_conntracks,
st->searched,
st->found,
st->new,
st->invalid,
st->ignore,
st->delete,
st->delete_list,
st->insert,
st->insert_failed,
st->drop,
st->early_drop,
st->icmp_error,
st->expect_new,
st->expect_create,
st->expect_delete
);
return 0;
}
static struct seq_operations ct_cpu_seq_ops = {
.start = ct_cpu_seq_start,
.next = ct_cpu_seq_next,
.stop = ct_cpu_seq_stop,
.show = ct_cpu_seq_show,
};
static int ct_cpu_seq_open(struct inode *inode, struct file *file)
{
return seq_open(file, &ct_cpu_seq_ops);
}
static struct file_operations ct_cpu_seq_fops = {
.owner = THIS_MODULE,
.open = ct_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
static unsigned int ip_confirm(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
......@@ -323,6 +505,10 @@ extern unsigned long ip_ct_tcp_timeout_close_wait;
extern unsigned long ip_ct_tcp_timeout_last_ack;
extern unsigned long ip_ct_tcp_timeout_time_wait;
extern unsigned long ip_ct_tcp_timeout_close;
extern unsigned long ip_ct_tcp_timeout_max_retrans;
extern int ip_ct_tcp_loose;
extern int ip_ct_tcp_be_liberal;
extern int ip_ct_tcp_max_retrans;
/* From ip_conntrack_proto_udp.c */
extern unsigned long ip_ct_udp_timeout;
......@@ -334,6 +520,11 @@ extern unsigned long ip_ct_icmp_timeout;
/* From ip_conntrack_proto_icmp.c */
extern unsigned long ip_ct_generic_timeout;
/* Log invalid packets of a given protocol */
unsigned int ip_ct_log_invalid = 0;
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
static struct ctl_table_header *ip_ct_sysctl_header;
static ctl_table ip_ct_sysctl_table[] = {
......@@ -449,6 +640,49 @@ static ctl_table ip_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
.procname = "ip_conntrack_log_invalid",
.data = &ip_ct_log_invalid,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &log_invalid_proto_min,
.extra2 = &log_invalid_proto_max,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
.procname = "ip_conntrack_tcp_timeout_max_retrans",
.data = &ip_ct_tcp_timeout_max_retrans,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
.procname = "ip_conntrack_tcp_loose",
.data = &ip_ct_tcp_loose,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
.procname = "ip_conntrack_tcp_be_liberal",
.data = &ip_ct_tcp_be_liberal,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
.procname = "ip_conntrack_tcp_max_retrans",
.data = &ip_ct_tcp_max_retrans,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 }
};
......@@ -494,7 +728,7 @@ static ctl_table ip_ct_net_table[] = {
#endif
static int init_or_cleanup(int init)
{
struct proc_dir_entry *proc;
struct proc_dir_entry *proc, *proc_exp, *proc_stat;
int ret = 0;
if (!init) goto cleanup;
......@@ -503,14 +737,24 @@ static int init_or_cleanup(int init)
if (ret < 0)
goto cleanup_nothing;
proc = proc_net_create("ip_conntrack", 0440, list_conntracks);
proc = proc_net_create("ip_conntrack", 0440, NULL);
if (!proc) goto cleanup_init;
proc->owner = THIS_MODULE;
proc->proc_fops = &ct_file_ops;
proc_exp = proc_net_create("ip_conntrack_expect", 0440, NULL);
if (!proc_exp) goto cleanup_proc;
proc_exp->proc_fops = &exp_file_ops;
proc_stat = proc_net_fops_create("ip_conntrack_stat", S_IRUGO,
&ct_cpu_seq_fops);
if (!proc_stat)
goto cleanup_proc_exp;
proc_stat->owner = THIS_MODULE;
ret = nf_register_hook(&ip_conntrack_defrag_ops);
if (ret < 0) {
printk("ip_conntrack: can't register pre-routing defrag hook.\n");
goto cleanup_proc;
goto cleanup_proc_stat;
}
ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
if (ret < 0) {
......@@ -562,6 +806,10 @@ static int init_or_cleanup(int init)
nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
cleanup_defragops:
nf_unregister_hook(&ip_conntrack_defrag_ops);
cleanup_proc_stat:
proc_net_remove("ip_conntrack_stat");
cleanup_proc_exp:
proc_net_remove("ip_conntrack_exp");
cleanup_proc:
proc_net_remove("ip_conntrack");
cleanup_init:
......@@ -638,7 +886,7 @@ EXPORT_SYMBOL(need_ip_conntrack);
EXPORT_SYMBOL(ip_conntrack_helper_register);
EXPORT_SYMBOL(ip_conntrack_helper_unregister);
EXPORT_SYMBOL(ip_ct_selective_cleanup);
EXPORT_SYMBOL(ip_ct_refresh);
EXPORT_SYMBOL(ip_ct_refresh_acct);
EXPORT_SYMBOL(ip_ct_find_proto);
EXPORT_SYMBOL(__ip_ct_find_proto);
EXPORT_SYMBOL(ip_ct_find_helper);
......
......@@ -19,6 +19,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
#include <linux/moduleparam.h>
MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
MODULE_DESCRIPTION("tftp connection tracking helper");
......@@ -27,7 +28,7 @@ MODULE_LICENSE("GPL");
#define MAX_PORTS 8
static int ports[MAX_PORTS];
static int ports_c;
MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
module_param_array(ports, int, ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of tftp servers");
#if 0
......@@ -104,10 +105,10 @@ static int __init init(void)
int i, ret;
char *tmpname;
if (!ports[0])
ports[0]=TFTP_PORT;
if (ports_c == 0)
ports[ports_c++] = TFTP_PORT;
for (i = 0 ; (i < MAX_PORTS) && ports[i] ; i++) {
for (i = 0; i < ports_c; i++) {
/* Create helper structure */
memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper));
......@@ -137,7 +138,6 @@ static int __init init(void)
fini();
return(ret);
}
ports_c++;
}
return(0);
}
......
......@@ -31,6 +31,7 @@
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
#include <linux/netfilter_ipv4/listhelp.h>
......@@ -144,7 +145,8 @@ check_for_demasq(struct sk_buff **pskb)
switch ((*pskb)->nh.iph->protocol) {
case IPPROTO_ICMP:
/* ICMP errors. */
ct = icmp_error_track(*pskb, &ctinfo, NF_IP_PRE_ROUTING);
protocol->error(*pskb, &ctinfo, NF_IP_PRE_ROUTING);
ct = (struct ip_conntrack *)(*pskb)->nfct->master;
if (ct) {
/* We only do SNAT in the compatibility layer.
So we can manipulate ICMP errors from
......@@ -165,7 +167,8 @@ check_for_demasq(struct sk_buff **pskb)
case IPPROTO_UDP:
IP_NF_ASSERT(((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
if (!get_tuple((*pskb)->nh.iph, *pskb, (*pskb)->nh.iph->ihl*4, &tuple, protocol)) {
if (!ip_ct_get_tuple((*pskb)->nh.iph, *pskb,
(*pskb)->nh.iph->ihl*4, &tuple, protocol)) {
if (net_ratelimit())
printk("ip_fw_compat_masq: Can't get tuple\n");
return NF_ACCEPT;
......
......@@ -49,7 +49,6 @@ static unsigned int ip_nat_htable_size;
static struct list_head *bysource;
static struct list_head *byipsproto;
LIST_HEAD(protos);
LIST_HEAD(helpers);
extern struct ip_nat_protocol unknown_nat_protocol;
......@@ -498,13 +497,6 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
return ret;
}
static inline int
helper_cmp(const struct ip_nat_helper *helper,
const struct ip_conntrack_tuple *tuple)
{
return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask);
}
/* Where to manip the reply packets (will be reverse manip). */
static unsigned int opposite_hook[NF_IP_NUMHOOKS]
= { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
......@@ -643,8 +635,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
/* If there's a helper, assign it; based on new tuple. */
if (!conntrack->master)
info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,
&reply);
info->helper = ip_nat_find_helper(&reply);
/* It's done. */
info->initialized |= (1 << HOOK2MANIP(hooknum));
......
......@@ -12,6 +12,7 @@
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/moduleparam.h>
#include <net/tcp.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
......@@ -33,7 +34,7 @@ MODULE_DESCRIPTION("ftp NAT helper");
static int ports[MAX_PORTS];
static int ports_c;
MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
module_param_array(ports, int, ports_c, 0400);
DECLARE_LOCK_EXTERN(ip_ftp_lock);
......@@ -313,10 +314,10 @@ static int __init init(void)
int i, ret = 0;
char *tmpname;
if (ports[0] == 0)
ports[0] = FTP_PORT;
if (ports_c == 0)
ports[ports_c] = FTP_PORT;
for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
for (i = 0; i < ports_c; i++) {
ftp[i].tuple.dst.protonum = IPPROTO_TCP;
ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
ftp[i].mask.dst.protonum = 0xFFFF;
......@@ -343,7 +344,6 @@ static int __init init(void)
fini();
return ret;
}
ports_c++;
}
return ret;
......
......@@ -47,6 +47,7 @@
#define DUMP_OFFSET(x)
#endif
static LIST_HEAD(helpers);
DECLARE_LOCK(ip_nat_seqofs_lock);
/* Setup TCP sequence correction given this change at this sequence */
......@@ -419,6 +420,18 @@ int ip_nat_helper_register(struct ip_nat_helper *me)
return ret;
}
struct ip_nat_helper *
ip_nat_find_helper(const struct ip_conntrack_tuple *tuple)
{
struct ip_nat_helper *h;
READ_LOCK(&ip_nat_lock);
h = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, tuple);
READ_UNLOCK(&ip_nat_lock);
return h;
}
static int
kill_helper(const struct ip_conntrack *i, void *helper)
{
......
......@@ -27,6 +27,7 @@
#include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/moduleparam.h>
#if 0
#define DEBUGP printk
......@@ -41,7 +42,7 @@ static int ports_c;
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("IRC (DCC) NAT helper");
MODULE_LICENSE("GPL");
MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
module_param_array(ports, int, ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of IRC servers");
/* protects irc part of conntracks */
......@@ -235,11 +236,10 @@ static int __init init(void)
struct ip_nat_helper *hlpr;
char *tmpname;
if (ports[0] == 0) {
ports[0] = IRC_PORT;
}
if (ports_c == 0)
ports[ports_c++] = IRC_PORT;
for (i = 0; (i < MAX_PORTS) && ports[i] != 0; i++) {
for (i = 0; i < ports_c; i++) {
hlpr = &ip_nat_irc_helpers[i];
hlpr->tuple.dst.protonum = IPPROTO_TCP;
hlpr->tuple.src.u.tcp.port = htons(ports[i]);
......@@ -269,7 +269,6 @@ static int __init init(void)
fini();
return 1;
}
ports_c++;
}
return ret;
}
......
......@@ -132,7 +132,8 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
ct = ip_conntrack_get(*pskb, &ctinfo);
/* Connection must be valid and new. */
IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
|| ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
IP_NF_ASSERT(out);
return ip_nat_setup_info(ct, targinfo, hooknum);
......
......@@ -47,6 +47,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
......@@ -1252,6 +1253,9 @@ static unsigned int nat_help(struct ip_conntrack *ct,
int dir = CTINFO2DIR(ctinfo);
struct iphdr *iph = (*pskb)->nh.iph;
struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
if (!skb_ip_make_writable(pskb, (*pskb)->len))
return NF_DROP;
spin_lock_bh(&snmp_lock);
......@@ -1357,4 +1361,4 @@ static void __exit fini(void)
module_init(init);
module_exit(fini);
MODULE_PARM(debug, "i");
module_param(debug, bool, 0600);
......@@ -32,6 +32,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/moduleparam.h>
MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
MODULE_DESCRIPTION("tftp NAT helper");
......@@ -41,7 +42,7 @@ MODULE_LICENSE("GPL");
static int ports[MAX_PORTS];
static int ports_c = 0;
MODULE_PARM(ports,"1-" __MODULE_STRING(MAX_PORTS) "i");
module_param_array(ports, int, ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of tftp servers");
#if 0
......@@ -162,10 +163,10 @@ static int __init init(void)
int i, ret = 0;
char *tmpname;
if (!ports[0])
ports[0] = TFTP_PORT;
if (ports_c == 0)
ports[ports_c++] = TFTP_PORT;
for (i = 0 ; (i < MAX_PORTS) && ports[i] ; i++) {
for (i = 0; i < ports_c; i++) {
memset(&tftp[i], 0, sizeof(struct ip_nat_helper));
tftp[i].tuple.dst.protonum = IPPROTO_UDP;
......@@ -194,7 +195,6 @@ static int __init init(void)
fini();
return ret;
}
ports_c++;
}
return ret;
}
......
......@@ -61,6 +61,8 @@ do { \
#endif
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
static DECLARE_MUTEX(ipt_mutex);
/* Must have mutex */
#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
......
......@@ -28,7 +28,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("iptables syslog logging module");
static unsigned int nflog = 1;
MODULE_PARM(nflog, "i");
module_param(nflog, int, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
#if 0
......
......@@ -92,8 +92,8 @@ masquerade_target(struct sk_buff **pskb,
return NF_ACCEPT;
ct = ip_conntrack_get(*pskb, &ctinfo);
IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW
|| ctinfo == IP_CT_RELATED));
IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
|| ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
mr = targinfo;
......
......@@ -34,8 +34,8 @@
* by that factor.
*
* flushtimeout:
* Specify, after how many clock ticks (intel: 100 per second) the queue
* should be flushed even if it is not full yet.
* Specify, after how many hundredths of a second the queue should be
* flushed even if it is not full yet.
*
* ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp
*/
......@@ -50,6 +50,7 @@
#include <linux/netlink.h>
#include <linux/netdevice.h>
#include <linux/mm.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_ULOG.h>
......@@ -74,15 +75,15 @@ MODULE_DESCRIPTION("iptables userspace logging module");
#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
static unsigned int nlbufsiz = 4096;
MODULE_PARM(nlbufsiz, "i");
module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */
MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
static unsigned int flushtimeout = 10 * HZ;
MODULE_PARM(flushtimeout, "i");
MODULE_PARM_DESC(flushtimeout, "buffer flush timeout");
static unsigned int flushtimeout = 10;
module_param(flushtimeout, int, 0600);
MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
static unsigned int nflog = 1;
MODULE_PARM(nflog, "i");
module_param(nflog, int, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
/* global data structures */
......@@ -97,7 +98,6 @@ typedef struct {
static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
static struct sock *nflognl; /* our socket */
static size_t qlen; /* current length of multipart-nlmsg */
DECLARE_LOCK(ulog_lock); /* spinlock */
/* send one ulog_buff_t to userspace */
......@@ -116,7 +116,7 @@ static void ulog_send(unsigned int nlgroupnum)
NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum);
DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n",
ub->qlen, nlgroup);
ub->qlen, nlgroupnum);
netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC);
ub->qlen = 0;
......@@ -126,7 +126,7 @@ static void ulog_send(unsigned int nlgroupnum)
}
/* timer function to flush queue in ULOG_FLUSH_INTERVAL time */
/* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data)
{
DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n");
......@@ -261,20 +261,21 @@ static void ipt_ulog_packet(unsigned int hooknum,
ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
}
/* if threshold is reached, send message to userspace */
if (qlen >= loginfo->qthreshold) {
if (loginfo->qthreshold > 1)
nlh->nlmsg_type = NLMSG_DONE;
}
ub->lastnlh = nlh;
/* if timer isn't already running, start it */
if (!timer_pending(&ub->timer)) {
ub->timer.expires = jiffies + flushtimeout;
ub->timer.expires = jiffies + flushtimeout * HZ / 100;
add_timer(&ub->timer);
}
/* if threshold is reached, send message to userspace */
if (ub->qlen >= loginfo->qthreshold) {
if (loginfo->qthreshold > 1)
nlh->nlmsg_type = NLMSG_DONE;
ulog_send(groupnum);
}
UNLOCK_BH(&ulog_lock);
return;
......
......@@ -15,6 +15,7 @@
#include <linux/ctype.h>
#include <linux/ip.h>
#include <linux/vmalloc.h>
#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_recent.h>
......@@ -37,12 +38,12 @@ KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>. htt
MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
MODULE_LICENSE("GPL");
MODULE_PARM(ip_list_tot,"i");
MODULE_PARM(ip_pkt_list_tot,"i");
MODULE_PARM(ip_list_hash_size,"i");
MODULE_PARM(ip_list_perms,"i");
module_param(ip_list_tot, int, 0400);
module_param(ip_pkt_list_tot, int, 0400);
module_param(ip_list_hash_size, int, 0400);
module_param(ip_list_perms, int, 0400);
#ifdef DEBUG
MODULE_PARM(debug,"i");
module_param(debug, int, 0600);
MODULE_PARM_DESC(debug,"debugging level, defaults to 1");
#endif
MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
......
#include <linux/module.h>
#include <linux/skbuff.h>
#include <net/ip.h>
#include <linux/sctp.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_sctp.h>
#ifdef DEBUG_SCTP
#define duprintf(format, args...) printk(format , ## args)
#else
#define duprintf(format, args...)
#endif
#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
|| (!!((invflag) & (option)) ^ (cond)))
static int
match_flags(const struct ipt_sctp_flag_info *flag_info,
const int flag_count,
u_int8_t chunktype,
u_int8_t chunkflags)
{
int i;
for (i = 0; i < flag_count; i++) {
if (flag_info[i].chunktype == chunktype) {
return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag;
}
}
return 1;
}
static int
match_packet(const struct sk_buff *skb,
const u_int32_t *chunkmap,
int chunk_match_type,
const struct ipt_sctp_flag_info *flag_info,
const int flag_count,
int *hotdrop)
{
int offset;
u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
sctp_chunkhdr_t sch;
#ifdef DEBUG_SCTP
int i = 0;
#endif
if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) {
SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap);
}
offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t);
do {
if (skb_copy_bits(skb, offset, &sch, sizeof(sch)) < 0) {
duprintf("Dropping invalid SCTP packet.\n");
*hotdrop = 1;
return 0;
}
duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n",
++i, offset, sch.type, htons(sch.length), sch.flags);
offset += (htons(sch.length) + 3) & ~3;
duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch.type)) {
switch (chunk_match_type) {
case SCTP_CHUNK_MATCH_ANY:
if (match_flags(flag_info, flag_count,
sch.type, sch.flags)) {
return 1;
}
break;
case SCTP_CHUNK_MATCH_ALL:
if (match_flags(flag_info, flag_count,
sch.type, sch.flags)) {
SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch.type);
}
break;
case SCTP_CHUNK_MATCH_ONLY:
if (!match_flags(flag_info, flag_count,
sch.type, sch.flags)) {
return 0;
}
break;
}
} else {
switch (chunk_match_type) {
case SCTP_CHUNK_MATCH_ONLY:
return 0;
}
}
} while (offset < skb->len);
switch (chunk_match_type) {
case SCTP_CHUNK_MATCH_ALL:
return SCTP_CHUNKMAP_IS_CLEAR(chunkmap);
case SCTP_CHUNK_MATCH_ANY:
return 0;
case SCTP_CHUNK_MATCH_ONLY:
return 1;
}
/* This will never be reached, but required to stop compiler whine */
return 0;
}
static int
match(const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const void *matchinfo,
int offset,
int *hotdrop)
{
const struct ipt_sctp_info *info;
sctp_sctphdr_t sh;
info = (const struct ipt_sctp_info *)matchinfo;
if (offset) {
duprintf("Dropping non-first fragment.. FIXME\n");
return 0;
}
if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &sh, sizeof(sh)) < 0) {
duprintf("Dropping evil TCP offset=0 tinygram.\n");
*hotdrop = 1;
return 0;
}
duprintf("spt: %d\tdpt: %d\n", ntohs(sh.source), ntohs(sh.dest));
return SCCHECK(((ntohs(sh.source) >= info->spts[0])
&& (ntohs(sh.source) <= info->spts[1])),
IPT_SCTP_SRC_PORTS, info->flags, info->invflags)
&& SCCHECK(((ntohs(sh.dest) >= info->dpts[0])
&& (ntohs(sh.dest) <= info->dpts[1])),
IPT_SCTP_DEST_PORTS, info->flags, info->invflags)
&& SCCHECK(match_packet(skb, info->chunkmap, info->chunk_match_type,
info->flag_info, info->flag_count,
hotdrop),
IPT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
}
static int
checkentry(const char *tablename,
const struct ipt_ip *ip,
void *matchinfo,
unsigned int matchsize,
unsigned int hook_mask)
{
const struct ipt_sctp_info *info;
info = (const struct ipt_sctp_info *)matchinfo;
return ip->proto == IPPROTO_SCTP
&& !(ip->invflags & IPT_INV_PROTO)
&& matchsize == IPT_ALIGN(sizeof(struct ipt_sctp_info))
&& !(info->flags & ~IPT_SCTP_VALID_FLAGS)
&& !(info->invflags & ~IPT_SCTP_VALID_FLAGS)
&& !(info->invflags & ~info->flags)
&& ((!(info->flags & IPT_SCTP_CHUNK_TYPES)) ||
(info->chunk_match_type &
(SCTP_CHUNK_MATCH_ALL
| SCTP_CHUNK_MATCH_ANY
| SCTP_CHUNK_MATCH_ONLY)));
}
static struct ipt_match sctp_match =
{
.list = { NULL, NULL},
.name = "sctp",
.match = &match,
.checkentry = &checkentry,
.destroy = NULL,
.me = THIS_MODULE
};
static int __init init(void)
{
return ipt_register_match(&sctp_match);
}
static void __exit fini(void)
{
ipt_unregister_match(&sctp_match);
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Kiran Kumar Immidi");
MODULE_DESCRIPTION("Match for SCTP protocol packets");
......@@ -11,6 +11,7 @@
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4/ip_tables.h>
MODULE_LICENSE("GPL");
......@@ -155,7 +156,7 @@ static struct nf_hook_ops ipt_ops[] = {
/* Default to forward because I got too much mail already. */
static int forward = NF_ACCEPT;
MODULE_PARM(forward, "i");
module_param(forward, bool, 0000);
static int __init init(void)
{
......
......@@ -66,6 +66,7 @@ do { \
#endif
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
static DECLARE_MUTEX(ip6t_mutex);
/* Must have mutex */
#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
......
......@@ -11,6 +11,7 @@
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/spinlock.h>
......@@ -26,7 +27,7 @@ MODULE_DESCRIPTION("IP6 tables LOG target module");
MODULE_LICENSE("GPL");
static unsigned int nflog = 1;
MODULE_PARM(nflog, "i");
module_param(nflog, int, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
struct in_device;
......
......@@ -10,6 +10,7 @@
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
MODULE_LICENSE("GPL");
......@@ -156,7 +157,7 @@ static struct nf_hook_ops ip6t_ops[] = {
/* Default to forward because I got too much mail already. */
static int forward = NF_ACCEPT;
MODULE_PARM(forward, "i");
module_param(forward, bool, 0000);
static int __init init(void)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment