Commit 2a526ac9 authored by Rusty Russell's avatar Rusty Russell Committed by David S. Miller

[NETFILTER]: Simplify expect handling

Now we've changed expect handling, we can simplify it significantly.

1) struct ip_conntrack_expect only exists until the connection
   matching it is created.  Now NAT is done directly at the time the
   expectation is matched, we don't need to keep this information
   around.

2) The term 'master' is used everywhere to mean the connection that
   expected this connection.  The 'master' field in the new connection
   points straight to the master connection, and holds a reference.

3) There is no direct link from the connection to the expectations it
   has created: we walk the global list to find them if we need to
   clean them up.  Each expectation holds a reference.

4) The ip_conntrack_expect_tuple_lock is now a proper subset of
   ip_conntrack_lock, so we can eliminate it.

5) Remove flags from helper: the policy of evicting the oldest
   expectation seems to be appropriate for everyone.

6) ip_conntrack_expect_find_get() and ip_conntrack_expect_put() are no
   longer required.

7) Remove reference count from expectations, and don't free when we
   fail ip_conntrack_expect_related(): have user call
   ip_conntrack_expect_free().
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 55d349b2
......@@ -102,33 +102,19 @@ struct ip_conntrack_expect
/* Internal linked list (global expectation list) */
struct list_head list;
/* reference count */
atomic_t use;
/* We expect this tuple, with the following mask */
struct ip_conntrack_tuple tuple, mask;
/* expectation list for this master */
struct list_head expected_list;
/* Function to call after setup and insertion */
void (*expectfn)(struct ip_conntrack *new,
struct ip_conntrack_expect *this);
/* The conntrack of the master connection */
struct ip_conntrack *expectant;
/* The conntrack of the sibling connection, set after
* expectation arrived */
struct ip_conntrack *sibling;
/* Tuple saved for conntrack */
struct ip_conntrack_tuple ct_tuple;
struct ip_conntrack *master;
/* Timer function; deletes the expectation. */
struct timer_list timeout;
/* Data filled out by the conntrack helpers follow: */
/* We expect this tuple, with the following mask */
struct ip_conntrack_tuple tuple, mask;
/* Function to call after setup and insertion */
void (*expectfn)(struct ip_conntrack *new);
#ifdef CONFIG_IP_NF_NAT_NEEDED
/* This is the original per-proto part, used to map the
* expected connection the way the recipient expects. */
......@@ -136,8 +122,6 @@ struct ip_conntrack_expect
/* Direction relative to the master connection. */
enum ip_conntrack_dir dir;
#endif
union ip_conntrack_expect_proto proto;
};
struct ip_conntrack_counter
......@@ -164,17 +148,12 @@ struct ip_conntrack
/* Accounting Information (same cache line as other written members) */
struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
#endif
/* If we're expecting another related connection, this will be
in expected linked list */
struct list_head sibling_list;
/* If we were expected by an expectation, this will be it */
struct ip_conntrack *master;
/* Current number of expected connections */
unsigned int expecting;
/* If we were expected by an expectation, this will be it */
struct ip_conntrack_expect *master;
/* Helper, if any. */
struct ip_conntrack_helper *helper;
......@@ -203,7 +182,7 @@ struct ip_conntrack
};
/* get master conntrack via master expectation */
#define master_ct(conntr) (conntr->master ? conntr->master->expectant : NULL)
#define master_ct(conntr) (conntr->master)
/* Alter reply tuple (maybe alter helper). */
extern void
......@@ -227,13 +206,6 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
/* decrement reference count on a conntrack */
extern inline void ip_conntrack_put(struct ip_conntrack *ct);
/* find unconfirmed expectation based on tuple */
struct ip_conntrack_expect *
ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
/* decrement reference count on an expectation */
void ip_conntrack_expect_put(struct ip_conntrack_expect *exp);
/* call to create an explicit dependency on ip_conntrack. */
extern void need_ip_conntrack(void);
......
......@@ -4,7 +4,6 @@
struct ip_conntrack_expect;
extern unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......
......@@ -48,6 +48,5 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb)
extern struct list_head *ip_conntrack_hash;
extern struct list_head ip_conntrack_expect_list;
DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
DECLARE_RWLOCK_EXTERN(ip_conntrack_expect_tuple_lock);
#endif /* _IP_CONNTRACK_CORE_H */
......@@ -34,7 +34,6 @@ struct ip_conntrack_expect;
/* For NAT to hook in when we find a packet which describes what other
* connection we should expect. */
extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
......
......@@ -5,15 +5,11 @@
struct module;
/* Reuse expectation when max_expected reached */
#define IP_CT_HELPER_F_REUSE_EXPECT 0x01
struct ip_conntrack_helper
{
struct list_head list; /* Internal use. */
const char *name; /* name of the module */
unsigned char flags; /* Flags (see above) */
struct module *me; /* pointer to self */
unsigned int max_expected; /* Maximum number of concurrent
* expected connections */
......@@ -39,9 +35,10 @@ extern struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_t
/* Allocate space for an expectation: this is mandatory before calling
ip_conntrack_expect_related. */
extern struct ip_conntrack_expect *ip_conntrack_expect_alloc(void);
extern void ip_conntrack_expect_free(struct ip_conntrack_expect *exp);
/* Add an expected connection: can have more than one per connection */
extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp,
struct ip_conntrack *related_to);
extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
#endif /*_IP_CONNTRACK_HELPER_H*/
......@@ -20,7 +20,6 @@ struct ip_ct_irc_master {
#ifdef __KERNEL__
extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......
......@@ -14,7 +14,6 @@ struct tftphdr {
#define TFTP_OPCODE_ERROR 5
unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
struct ip_conntrack_expect *exp);
......
......@@ -28,5 +28,6 @@ extern int ip_nat_seq_adjust(struct sk_buff **pskb,
/* Setup NAT on this expected conntrack so it follows master, but goes
* to port ct->master->saved_proto. */
extern void ip_nat_follow_master(struct ip_conntrack *ct);
extern void ip_nat_follow_master(struct ip_conntrack *ct,
struct ip_conntrack_expect *this);
#endif
......@@ -45,7 +45,6 @@ static char amanda_buffer[65536];
static DECLARE_LOCK(amanda_buffer_lock);
unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......@@ -110,6 +109,7 @@ static int help(struct sk_buff **pskb,
}
exp->expectfn = NULL;
exp->master = ct;
exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
exp->tuple.src.u.tcp.port = 0;
......@@ -124,12 +124,14 @@ static int help(struct sk_buff **pskb,
exp->mask.dst.u.tcp.port = 0xFFFF;
if (ip_nat_amanda_hook)
ret = ip_nat_amanda_hook(pskb, ct, ctinfo,
ret = ip_nat_amanda_hook(pskb, ctinfo,
tmp - amanda_buffer,
len, exp);
else if (ip_conntrack_expect_related(exp, ct) != 0)
else if (ip_conntrack_expect_related(exp) != 0) {
ip_conntrack_expect_free(exp);
ret = NF_DROP;
}
}
out:
UNLOCK_BH(&amanda_buffer_lock);
......
......@@ -58,7 +58,6 @@
#endif
DECLARE_RWLOCK(ip_conntrack_lock);
DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
/* ip_conntrack_standalone needs this */
atomic_t ip_conntrack_count = ATOMIC_INIT(0);
......@@ -136,129 +135,70 @@ ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
/* ip_conntrack_expect helper functions */
/* Compare tuple parts depending on mask. */
static inline int expect_cmp(const struct ip_conntrack_expect *i,
const struct ip_conntrack_tuple *tuple)
{
MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
}
static void
destroy_expect(struct ip_conntrack_expect *exp)
static void destroy_expect(struct ip_conntrack_expect *exp)
{
DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
IP_NF_ASSERT(atomic_read(&exp->use) == 0);
ip_conntrack_put(exp->master);
IP_NF_ASSERT(!timer_pending(&exp->timeout));
kmem_cache_free(ip_conntrack_expect_cachep, exp);
CONNTRACK_STAT_INC(expect_delete);
}
inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
{
IP_NF_ASSERT(exp);
if (atomic_dec_and_test(&exp->use)) {
/* usage count dropped to zero */
destroy_expect(exp);
}
}
static inline struct ip_conntrack_expect *
__ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
{
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
struct ip_conntrack_expect *, tuple);
}
/* Find a expectation corresponding to a tuple. */
struct ip_conntrack_expect *
ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
static void unlink_expect(struct ip_conntrack_expect *exp)
{
struct ip_conntrack_expect *exp;
READ_LOCK(&ip_conntrack_lock);
READ_LOCK(&ip_conntrack_expect_tuple_lock);
exp = __ip_ct_expect_find(tuple);
if (exp)
atomic_inc(&exp->use);
READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
READ_UNLOCK(&ip_conntrack_lock);
return exp;
MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
list_del(&exp->list);
/* Logically in destroy_expect, but we hold the lock here. */
exp->master->expecting--;
}
/* remove one specific expectation from all lists and drop refcount,
* does _NOT_ delete the timer. */
static void __unexpect_related(struct ip_conntrack_expect *expect)
static void expectation_timed_out(unsigned long ul_expect)
{
DEBUGP("unexpect_related(%p)\n", expect);
MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
struct ip_conntrack_expect *exp = (void *)ul_expect;
/* we're not allowed to unexpect a confirmed expectation! */
IP_NF_ASSERT(!expect->sibling);
/* delete from global and local lists */
list_del(&expect->list);
list_del(&expect->expected_list);
/* decrement expect-count of master conntrack */
if (expect->expectant)
expect->expectant->expecting--;
ip_conntrack_expect_put(expect);
WRITE_LOCK(&ip_conntrack_lock);
unlink_expect(exp);
WRITE_UNLOCK(&ip_conntrack_lock);
destroy_expect(exp);
}
/* remove one specific expecatation from all lists, drop refcount
* and expire timer.
* This function can _NOT_ be called for confirmed expects! */
static void unexpect_related(struct ip_conntrack_expect *expect)
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
static struct ip_conntrack_expect *
find_expectation(const struct ip_conntrack_tuple *tuple)
{
IP_NF_ASSERT(expect->expectant);
IP_NF_ASSERT(expect->expectant->helper);
/* if we are supposed to have a timer, but we can't delete
* it: race condition. __unexpect_related will
* be calledd by timeout function */
if (expect->expectant->helper->timeout
&& !del_timer(&expect->timeout))
return;
struct ip_conntrack_expect *i;
__unexpect_related(expect);
list_for_each_entry(i, &ip_conntrack_expect_list, list) {
/* If master is not in hash table yet (ie. packet hasn't left
this machine yet), how can other end know about expected?
Hence these are not the droids you are looking for (if
master ct never got confirmed, we'd hold a reference to it
and weird things would happen to future packets). */
if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
&& is_confirmed(i->master)
&& (!i->timeout.function || del_timer(&i->timeout))) {
unlink_expect(i);
return i;
}
}
return NULL;
}
/* delete all unconfirmed expectations for this conntrack */
static void remove_expectations(struct ip_conntrack *ct, int drop_refcount)
/* delete all expectations for this conntrack */
static void remove_expectations(struct ip_conntrack *ct)
{
struct list_head *exp_entry, *next;
struct ip_conntrack_expect *exp;
struct ip_conntrack_expect *i, *tmp;
DEBUGP("remove_expectations(%p)\n", ct);
list_for_each_safe(exp_entry, next, &ct->sibling_list) {
exp = list_entry(exp_entry, struct ip_conntrack_expect,
expected_list);
/* Optimization: most connection never expect any others. */
if (ct->expecting == 0)
return;
/* we skip established expectations, as we want to delete
* the un-established ones only */
if (exp->sibling) {
DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
if (drop_refcount) {
/* Indicate that this expectations parent is dead */
ip_conntrack_put(exp->expectant);
exp->expectant = NULL;
list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
if (i->master == ct
&& (!i->timeout.function || del_timer(&i->timeout))) {
unlink_expect(i);
destroy_expect(i);
}
continue;
}
IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
IP_NF_ASSERT(exp->expectant == ct);
/* delete expectation from global and private lists */
unexpect_related(exp);
}
}
......@@ -275,14 +215,14 @@ clean_from_lists(struct ip_conntrack *ct)
LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
/* Destroy all un-established, pending expectations */
remove_expectations(ct, 1);
/* Destroy all pending expectations */
remove_expectations(ct);
}
static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
struct ip_conntrack_protocol *proto;
DEBUGP("destroy_conntrack(%p)\n", ct);
......@@ -304,8 +244,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
* except TFTP can create an expectation on the first packet,
* before connection is in the list, so we need to clean here,
* too. */
if (ct->expecting)
remove_expectations(ct, 1);
remove_expectations(ct);
/* We overload first tuple to link into unconfirmed list. */
if (!is_confirmed(ct)) {
......@@ -313,21 +252,11 @@ destroy_conntrack(struct nf_conntrack *nfct)
list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
}
/* Delete our master expectation */
if (ct->master) {
if (ct->master->expectant) {
/* can't call __unexpect_related here,
* since it would screw up expect_list */
list_del(&ct->master->expected_list);
master = ct->master->expectant;
}
kmem_cache_free(ip_conntrack_expect_cachep, ct->master);
}
CONNTRACK_STAT_INC(delete);
WRITE_UNLOCK(&ip_conntrack_lock);
if (master)
ip_conntrack_put(master);
if (ct->master)
ip_conntrack_put(ct->master);
DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
kmem_cache_free(ip_conntrack_cachep, ct);
......@@ -529,7 +458,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack *conntrack;
struct ip_conntrack_tuple repl_tuple;
size_t hash;
struct ip_conntrack_expect *expected;
struct ip_conntrack_expect *exp;
if (!ip_conntrack_hash_rnd_initted) {
get_random_bytes(&ip_conntrack_hash_rnd, 4);
......@@ -577,73 +506,39 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
conntrack->timeout.data = (unsigned long)conntrack;
conntrack->timeout.function = death_by_timeout;
INIT_LIST_HEAD(&conntrack->sibling_list);
WRITE_LOCK(&ip_conntrack_lock);
/* Need finding and deleting of expected ONLY if we win race */
READ_LOCK(&ip_conntrack_expect_tuple_lock);
expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
struct ip_conntrack_expect *, tuple);
READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
if (expected) {
/* If master is not in hash table yet (ie. packet hasn't left
this machine yet), how can other end know about expected?
Hence these are not the droids you are looking for (if
master ct never got confirmed, we'd hold a reference to it
and weird things would happen to future packets). */
if (!is_confirmed(expected->expectant)) {
conntrack->helper = ip_ct_find_helper(&repl_tuple);
goto end;
}
/* Expectation is dying... */
if (expected->expectant->helper->timeout
&& !del_timer(&expected->timeout))
goto end;
exp = find_expectation(tuple);
if (exp) {
DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
conntrack, expected);
conntrack, exp);
/* Welcome, Mr. Bond. We've been expecting you... */
IP_NF_ASSERT(expected->expectant);
__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
conntrack->master = expected;
expected->sibling = conntrack;
conntrack->master = exp->master;
#if CONFIG_IP_NF_CONNTRACK_MARK
conntrack->mark = expected->expectant->mark;
conntrack->mark = exp->master->mark;
#endif
LIST_DELETE(&ip_conntrack_expect_list, expected);
expected->expectant->expecting--;
nf_conntrack_get(&master_ct(conntrack)->ct_general);
/* this is a braindead... --pablo */
atomic_inc(&ip_conntrack_count);
/* Overload tuple linked list to put us in unconfirmed list. */
list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list,
&unconfirmed);
WRITE_UNLOCK(&ip_conntrack_lock);
if (expected->expectfn)
expected->expectfn(conntrack);
nf_conntrack_get(&conntrack->master->ct_general);
CONNTRACK_STAT_INC(expect_new);
goto ret;
} else {
conntrack->helper = ip_ct_find_helper(&repl_tuple);
CONNTRACK_STAT_INC(new);
}
end:
/* Overload tuple linked list to put us in unconfirmed list. */
list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
atomic_inc(&ip_conntrack_count);
WRITE_UNLOCK(&ip_conntrack_lock);
ret: return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
if (exp) {
if (exp->expectfn)
exp->expectfn(conntrack, exp);
destroy_expect(exp);
}
return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
}
/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
......@@ -795,55 +690,50 @@ int invert_tuplepr(struct ip_conntrack_tuple *inverse,
ip_ct_find_proto(orig->dst.protonum));
}
static inline int resent_expect(const struct ip_conntrack_expect *i,
const struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_tuple *mask)
{
DEBUGP("resent_expect\n");
DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
|| (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
&& ip_ct_tuple_equal(&i->mask, mask));
}
/* Would two expected things clash? */
static inline int expect_clash(const struct ip_conntrack_expect *i,
const struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_tuple *mask)
static inline int expect_clash(const struct ip_conntrack_expect *a,
const struct ip_conntrack_expect *b)
{
/* Part covered by intersection of masks must be unequal,
otherwise they clash */
struct ip_conntrack_tuple intersect_mask
= { { i->mask.src.ip & mask->src.ip,
{ i->mask.src.u.all & mask->src.u.all } },
{ i->mask.dst.ip & mask->dst.ip,
{ i->mask.dst.u.all & mask->dst.u.all },
i->mask.dst.protonum & mask->dst.protonum } };
= { { a->mask.src.ip & b->mask.src.ip,
{ a->mask.src.u.all & b->mask.src.u.all } },
{ a->mask.dst.ip & b->mask.dst.ip,
{ a->mask.dst.u.all & b->mask.dst.u.all },
a->mask.dst.protonum & b->mask.dst.protonum } };
return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
}
inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
static inline int expect_matches(const struct ip_conntrack_expect *a,
const struct ip_conntrack_expect *b)
{
WRITE_LOCK(&ip_conntrack_lock);
unexpect_related(expect);
WRITE_UNLOCK(&ip_conntrack_lock);
return a->master == b->master
&& ip_ct_tuple_equal(&a->tuple, &b->tuple)
&& ip_ct_tuple_equal(&a->mask, &b->mask);
}
static void expectation_timed_out(unsigned long ul_expect)
/* Generally a bad idea to call this: could have matched already. */
void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
{
struct ip_conntrack_expect *expect = (void *) ul_expect;
struct ip_conntrack_expect *i;
DEBUGP("expectation %p timed out\n", expect);
WRITE_LOCK(&ip_conntrack_lock);
__unexpect_related(expect);
/* choose the the oldest expectation to evict */
list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
if (expect_matches(i, exp)
&& (!i->timeout.function || del_timer(&i->timeout))) {
unlink_expect(i);
WRITE_UNLOCK(&ip_conntrack_lock);
destroy_expect(i);
return;
}
}
WRITE_UNLOCK(&ip_conntrack_lock);
}
struct ip_conntrack_expect *
ip_conntrack_expect_alloc(void)
struct ip_conntrack_expect *ip_conntrack_expect_alloc(void)
{
struct ip_conntrack_expect *new;
......@@ -852,135 +742,97 @@ ip_conntrack_expect_alloc(void)
DEBUGP("expect_related: OOM allocating expect\n");
return NULL;
}
new->master = NULL;
return new;
}
/* tuple_cmp compares whole union, we have to initialized cleanly */
memset(new, 0, sizeof(struct ip_conntrack_expect));
atomic_set(&new->use, 1);
void ip_conntrack_expect_free(struct ip_conntrack_expect *expect)
{
kmem_cache_free(ip_conntrack_expect_cachep, expect);
}
return new;
static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
{
atomic_inc(&exp->master->ct_general.use);
exp->master->expecting++;
list_add(&exp->list, &ip_conntrack_expect_list);
if (exp->master->helper->timeout) {
init_timer(&exp->timeout);
exp->timeout.data = (unsigned long)exp;
exp->timeout.function = expectation_timed_out;
exp->timeout.expires
= jiffies + exp->master->helper->timeout * HZ;
add_timer(&exp->timeout);
} else
exp->timeout.function = NULL;
CONNTRACK_STAT_INC(expect_create);
}
static void
ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
struct ip_conntrack *related_to)
/* Race with expectations being used means we could have none to find; OK. */
static void evict_oldest_expect(struct ip_conntrack *master)
{
DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
new->expectant = related_to;
new->sibling = NULL;
/* add to expected list for this connection */
list_add_tail(&new->expected_list, &related_to->sibling_list);
/* add to global list of expectations */
list_prepend(&ip_conntrack_expect_list, &new->list);
/* add and start timer if required */
if (related_to->helper->timeout) {
init_timer(&new->timeout);
new->timeout.data = (unsigned long)new;
new->timeout.function = expectation_timed_out;
new->timeout.expires = jiffies +
related_to->helper->timeout * HZ;
add_timer(&new->timeout);
struct ip_conntrack_expect *i;
list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
if (i->master == master) {
if (!i->timeout.function || del_timer(&i->timeout)) {
unlink_expect(i);
destroy_expect(i);
}
break;
}
}
related_to->expecting++;
}
/* Add a related connection. */
int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
struct ip_conntrack *related_to)
static inline int refresh_timer(struct ip_conntrack_expect *i)
{
struct ip_conntrack_expect *old;
int ret = 0;
if (!i->timeout.function)
return 1;
WRITE_LOCK(&ip_conntrack_lock);
/* Because of the write lock, no reader can walk the lists,
* so there is no need to use the tuple lock too */
if (!del_timer(&i->timeout))
return 0;
i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
add_timer(&i->timeout);
return 1;
}
int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
{
struct ip_conntrack_expect *i;
int ret;
DEBUGP("ip_conntrack_expect_related %p\n", related_to);
DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
struct ip_conntrack_expect *, &expect->tuple,
&expect->mask);
if (old && old->expectant == related_to) {
/* Helper private data may contain offsets but no pointers
pointing into the payload - otherwise we should have to copy
the data filled out by the helper over the old one */
DEBUGP("expect_related: resent packet\n");
if (related_to->helper->timeout) {
if (!del_timer(&old->timeout)) {
/* expectation is dying. Fall through */
WRITE_LOCK(&ip_conntrack_lock);
list_for_each_entry(i, &ip_conntrack_expect_list, list) {
if (expect_matches(i, expect)) {
/* Refresh timer: if it's dying, ignore.. */
if (refresh_timer(i)) {
ret = 0;
/* We don't need the one they've given us. */
ip_conntrack_expect_free(expect);
goto out;
} else {
old->timeout.expires = jiffies +
related_to->helper->timeout * HZ;
add_timer(&old->timeout);
}
} else if (expect_clash(i, expect)) {
ret = -EBUSY;
goto out;
}
WRITE_UNLOCK(&ip_conntrack_lock);
ip_conntrack_expect_put(expect);
return -EEXIST;
} else if (related_to->helper->max_expected &&
related_to->expecting >= related_to->helper->max_expected) {
/* old == NULL */
if (!(related_to->helper->flags &
IP_CT_HELPER_F_REUSE_EXPECT)) {
WRITE_UNLOCK(&ip_conntrack_lock);
if (net_ratelimit())
printk(KERN_WARNING
"ip_conntrack: max number of expected "
"connections %i of %s reached for "
"%u.%u.%u.%u->%u.%u.%u.%u\n",
related_to->helper->max_expected,
related_to->helper->name,
NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
ip_conntrack_expect_put(expect);
return -EPERM;
}
DEBUGP("ip_conntrack: max number of expected "
"connections %i of %s reached for "
"%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
related_to->helper->max_expected,
related_to->helper->name,
NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
/* choose the the oldest expectation to evict */
list_for_each_entry(old, &related_to->sibling_list,
expected_list)
if (old->sibling == NULL)
break;
/* We cannot fail since related_to->expecting is the number
* of unconfirmed expectations */
IP_NF_ASSERT(old && old->sibling == NULL);
/* newnat14 does not reuse the real allocated memory
* structures but rather unexpects the old and
* allocates a new. unexpect_related will decrement
* related_to->expecting.
*/
unexpect_related(old);
ret = -EPERM;
} else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
struct ip_conntrack_expect *, &expect->tuple,
&expect->mask)) {
WRITE_UNLOCK(&ip_conntrack_lock);
DEBUGP("expect_related: busy!\n");
ip_conntrack_expect_put(expect);
return -EBUSY;
}
out: ip_conntrack_expect_insert(expect, related_to);
/* Will be over limit? */
if (expect->master->helper->max_expected &&
expect->master->expecting >= expect->master->helper->max_expected)
evict_oldest_expect(expect->master);
ip_conntrack_expect_insert(expect);
ret = 0;
out:
WRITE_UNLOCK(&ip_conntrack_lock);
CONNTRACK_STAT_INC(expect_create);
return ret;
}
......@@ -997,7 +849,7 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
DUMP_TUPLE(newreply);
conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
if (!conntrack->master && list_empty(&conntrack->sibling_list))
if (!conntrack->master && conntrack->expecting == 0)
conntrack->helper = ip_ct_find_helper(newreply);
WRITE_UNLOCK(&ip_conntrack_lock);
}
......@@ -1014,23 +866,30 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
static inline int unhelp(struct ip_conntrack_tuple_hash *i,
const struct ip_conntrack_helper *me)
{
if (i->ctrack->helper == me) {
/* Get rid of any expected. */
remove_expectations(i->ctrack, 0);
/* And *then* set helper to NULL */
if (i->ctrack->helper == me)
i->ctrack->helper = NULL;
}
return 0;
}
void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
{
unsigned int i;
struct ip_conntrack_expect *exp, *tmp;
/* Need write lock here, to delete helper. */
WRITE_LOCK(&ip_conntrack_lock);
LIST_DELETE(&helpers, me);
/* Get rid of expectations */
list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
if (exp->master->helper == me) {
if (!exp->timeout.function
|| del_timer(&exp->timeout)) {
unlink_expect(exp);
destroy_expect(exp);
}
}
}
/* Get rid of expecteds, set helpers to NULL. */
LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
for (i = 0; i < ip_conntrack_htable_size; i++)
......
......@@ -40,7 +40,6 @@ static int loose;
module_param(loose, int, 0600);
unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
......@@ -407,7 +406,7 @@ static int help(struct sk_buff **pskb,
networks, or the packet filter itself). */
if (!loose) {
ret = NF_ACCEPT;
ip_conntrack_expect_put(exp);
ip_conntrack_expect_free(exp);
goto out_update_nl;
}
exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
......@@ -423,17 +422,19 @@ static int help(struct sk_buff **pskb,
{ 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }});
exp->expectfn = NULL;
exp->master = ct;
/* Now, NAT might want to mangle the packet, and register the
* (possibly changed) expectation itself. */
if (ip_nat_ftp_hook)
ret = ip_nat_ftp_hook(pskb, ct, ctinfo, search[i].ftptype,
ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
matchoff, matchlen, exp, &seq);
else {
/* Can't expect this? Best to drop packet now. */
if (ip_conntrack_expect_related(exp, ct) != 0)
if (ip_conntrack_expect_related(exp) != 0) {
ip_conntrack_expect_free(exp);
ret = NF_DROP;
else
} else
ret = NF_ACCEPT;
}
......@@ -476,7 +477,6 @@ static int __init init(void)
ftp[i].mask.dst.protonum = 0xFFFF;
ftp[i].max_expected = 1;
ftp[i].timeout = 0;
ftp[i].flags = IP_CT_HELPER_F_REUSE_EXPECT;
ftp[i].me = ip_conntrack_ftp;
ftp[i].help = help;
......
......@@ -44,7 +44,6 @@ static char irc_buffer[65536];
static DECLARE_LOCK(irc_buffer_lock);
unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......@@ -220,13 +219,16 @@ static int help(struct sk_buff **pskb,
{ { 0, { 0 } },
{ 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }});
exp->expectfn = NULL;
exp->master = ct;
if (ip_nat_irc_hook)
ret = ip_nat_irc_hook(pskb, ct, ctinfo,
ret = ip_nat_irc_hook(pskb, ctinfo,
addr_beg_p - ib_ptr,
addr_end_p - addr_beg_p,
exp);
else if (ip_conntrack_expect_related(exp, ct) != 0)
else if (ip_conntrack_expect_related(exp) != 0) {
ip_conntrack_expect_free(exp);
ret = NF_DROP;
}
goto out;
} /* for .. NUM_DCCPROTO */
} /* while data < ... */
......
......@@ -200,7 +200,6 @@ static void *exp_seq_start(struct seq_file *s, loff_t *pos)
/* strange seq_file api calls stop even if we fail,
* thus we need to grab lock since stop unlocks */
READ_LOCK(&ip_conntrack_lock);
READ_LOCK(&ip_conntrack_expect_tuple_lock);
if (list_empty(e))
return NULL;
......@@ -227,7 +226,6 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void exp_seq_stop(struct seq_file *s, void *v)
{
READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
READ_UNLOCK(&ip_conntrack_lock);
}
......@@ -235,14 +233,13 @@ static int exp_seq_show(struct seq_file *s, void *v)
{
struct ip_conntrack_expect *expect = v;
if (expect->expectant->helper->timeout)
if (expect->timeout.function)
seq_printf(s, "%lu ", timer_pending(&expect->timeout)
? (expect->timeout.expires - jiffies)/HZ : 0);
else
seq_printf(s, "- ");
seq_printf(s, "use=%u proto=%u ", atomic_read(&expect->use),
expect->tuple.dst.protonum);
seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
print_tuple(s, &expect->tuple,
ip_ct_find_proto(expect->tuple.dst.protonum));
......@@ -910,14 +907,12 @@ EXPORT_SYMBOL(ip_ct_protos);
EXPORT_SYMBOL(ip_ct_find_proto);
EXPORT_SYMBOL(ip_ct_find_helper);
EXPORT_SYMBOL(ip_conntrack_expect_alloc);
EXPORT_SYMBOL(ip_conntrack_expect_free);
EXPORT_SYMBOL(ip_conntrack_expect_related);
EXPORT_SYMBOL(ip_conntrack_unexpect_related);
EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
EXPORT_SYMBOL_GPL(ip_conntrack_expect_put);
EXPORT_SYMBOL(ip_conntrack_tuple_taken);
EXPORT_SYMBOL(ip_ct_gather_frags);
EXPORT_SYMBOL(ip_conntrack_htable_size);
EXPORT_SYMBOL(ip_conntrack_expect_list);
EXPORT_SYMBOL(ip_conntrack_lock);
EXPORT_SYMBOL(ip_conntrack_hash);
EXPORT_SYMBOL(ip_conntrack_untracked);
......
......@@ -39,7 +39,6 @@ MODULE_PARM_DESC(ports, "port numbers of tftp servers");
#endif
unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
struct ip_conntrack_expect *exp);
EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
......@@ -76,14 +75,17 @@ static int tftp_help(struct sk_buff **pskb,
exp->mask.dst.u.udp.port = 0xffff;
exp->mask.dst.protonum = 0xffff;
exp->expectfn = NULL;
exp->master = ct;
DEBUGP("expect: ");
DUMP_TUPLE(&exp->tuple);
DUMP_TUPLE(&exp->mask);
if (ip_nat_tftp_hook)
ret = ip_nat_tftp_hook(pskb, ct, ctinfo, exp);
else if (ip_conntrack_expect_related(exp, ct) != 0)
ret = ip_nat_tftp_hook(pskb, ctinfo, exp);
else if (ip_conntrack_expect_related(exp) != 0) {
ip_conntrack_expect_free(exp);
ret = NF_DROP;
}
break;
case TFTP_OPCODE_DATA:
case TFTP_OPCODE_ACK:
......
......@@ -32,7 +32,6 @@ MODULE_DESCRIPTION("Amanda NAT helper");
MODULE_LICENSE("GPL");
static unsigned int help(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......@@ -52,22 +51,18 @@ static unsigned int help(struct sk_buff **pskb,
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
int err;
exp->tuple.dst.u.tcp.port = htons(port);
atomic_inc(&exp->use);
err = ip_conntrack_expect_related(exp, ct);
/* Success, or retransmit. */
if (!err || err == -EEXIST)
if (ip_conntrack_expect_related(exp) == 0)
break;
}
if (port == 0) {
ip_conntrack_expect_put(exp);
ip_conntrack_expect_free(exp);
return NF_DROP;
}
sprintf(buffer, "%u", port);
ret = ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
matchoff, matchlen,
buffer, strlen(buffer));
if (ret != NF_ACCEPT)
......
......@@ -113,7 +113,6 @@ static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t,
/* So, this packet has hit the connection tracking matching code.
Mangle it, and change the expectation to match the new version. */
static unsigned int ip_nat_ftp(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
......@@ -124,6 +123,7 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb,
u_int32_t newip;
u_int16_t port;
int dir = CTINFO2DIR(ctinfo);
struct ip_conntrack *ct = exp->master;
DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
......@@ -138,17 +138,13 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb,
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
int err;
exp->tuple.dst.u.tcp.port = htons(port);
atomic_inc(&exp->use);
err = ip_conntrack_expect_related(exp, ct);
/* Success, or retransmit. */
if (!err || err == -EEXIST)
if (ip_conntrack_expect_related(exp) == 0)
break;
}
if (port == 0) {
ip_conntrack_expect_put(exp);
ip_conntrack_expect_free(exp);
return NF_DROP;
}
......
......@@ -438,12 +438,13 @@ static void ip_nat_copy_manip(struct ip_nat_info *master,
/* Setup NAT on this expected conntrack so it follows master. */
/* If we fail to get a free NAT slot, we'll get dropped on confirm */
void ip_nat_follow_master(struct ip_conntrack *ct)
void ip_nat_follow_master(struct ip_conntrack *ct,
struct ip_conntrack_expect *this)
{
struct ip_nat_info *master = &ct->master->expectant->nat.info;
struct ip_nat_info *master = &ct->master->nat.info;
/* This must be a fresh one. */
BUG_ON(ct->nat.info.initialized);
ip_nat_copy_manip(master, ct->master, ct);
ip_nat_copy_manip(master, this, ct);
}
......@@ -37,7 +37,6 @@ MODULE_LICENSE("GPL");
/* FIXME: Time out? --RR */
static unsigned int help(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......@@ -63,18 +62,13 @@ static unsigned int help(struct sk_buff **pskb,
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
int err;
exp->tuple.dst.u.tcp.port = htons(port);
atomic_inc(&exp->use);
err = ip_conntrack_expect_related(exp, ct);
/* Success, or retransmit. */
if (!err || err == -EEXIST)
if (ip_conntrack_expect_related(exp) == 0)
break;
}
if (port == 0) {
ip_conntrack_expect_put(exp);
ip_conntrack_expect_free(exp);
return NF_DROP;
}
......@@ -95,7 +89,7 @@ static unsigned int help(struct sk_buff **pskb,
DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
buffer, NIPQUAD(exp->tuple.src.ip), port);
ret = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
matchoff, matchlen, buffer,
strlen(buffer));
if (ret != NF_ACCEPT)
......
......@@ -39,15 +39,14 @@ MODULE_DESCRIPTION("tftp NAT helper");
MODULE_LICENSE("GPL");
static unsigned int help(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
struct ip_conntrack_expect *exp)
{
exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port;
exp->dir = IP_CT_DIR_REPLY;
exp->expectfn = ip_nat_follow_master;
if (ip_conntrack_expect_related(exp, ct) != 0) {
ip_conntrack_expect_put(exp);
if (ip_conntrack_expect_related(exp) != 0) {
ip_conntrack_expect_free(exp);
return NF_DROP;
}
return NF_ACCEPT;
......
......@@ -38,7 +38,6 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct ipt_helper_info *info = matchinfo;
struct ip_conntrack_expect *exp;
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
int ret = info->invert;
......@@ -54,28 +53,21 @@ match(const struct sk_buff *skb,
return ret;
}
exp = ct->master;
READ_LOCK(&ip_conntrack_lock);
if (!exp->expectant) {
DEBUGP("ipt_helper: expectation %p without expectant !?!\n",
exp);
goto out_unlock;
}
if (!exp->expectant->helper) {
if (!ct->master->helper) {
DEBUGP("ipt_helper: master ct %p has no helper\n",
exp->expectant);
goto out_unlock;
}
DEBUGP("master's name = %s , info->name = %s\n",
exp->expectant->helper->name, info->name);
ct->master->helper->name, info->name);
if (info->name[0] == '\0')
ret ^= 1;
else
ret ^= !strncmp(exp->expectant->helper->name, info->name,
strlen(exp->expectant->helper->name));
ret ^= !strncmp(ct->master->helper->name, info->name,
strlen(ct->master->helper->name));
out_unlock:
READ_UNLOCK(&ip_conntrack_lock);
return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment