Commit ea781f19 authored by Eric Dumazet's avatar Eric Dumazet Committed by Patrick McHardy

netfilter: nf_conntrack: use SLAB_DESTROY_BY_RCU and get rid of call_rcu()

Use "hlist_nulls" infrastructure we added in 2.6.29 for RCUification of UDP & TCP.

This permits an easy conversion from call_rcu() based hash lists to a
SLAB_DESTROY_BY_RCU one.

Avoiding call_rcu() delay at nf_conn freeing time has numerous gains.

First, it doesnt fill RCU queues (up to 10000 elements per cpu).
This reduces OOM possibility, if queued elements are not taken into account
This reduces latency problems when RCU queue size hits hilimit and triggers
emergency mode.

- It allows fast reuse of just freed elements, permitting better use of
CPU cache.

- We delete rcu_head from "struct nf_conn", shrinking size of this structure
by 8 or 16 bytes.

This patch only takes care of "struct nf_conn".
call_rcu() is still used for less critical conntrack parts, that may
be converted later if necessary.
Signed-off-by: default avatarEric Dumazet <dada1@cosmosbay.com>
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
parent 1f9352ae
...@@ -91,8 +91,7 @@ struct nf_conn_help { ...@@ -91,8 +91,7 @@ struct nf_conn_help {
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
struct nf_conn struct nf_conn {
{
/* Usage count in here is 1 for hash table/destruct timer, 1 per skb, /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
plus 1 for any connection(s) we are `master' for */ plus 1 for any connection(s) we are `master' for */
struct nf_conntrack ct_general; struct nf_conntrack ct_general;
...@@ -126,7 +125,6 @@ struct nf_conn ...@@ -126,7 +125,6 @@ struct nf_conn
#ifdef CONFIG_NET_NS #ifdef CONFIG_NET_NS
struct net *ct_net; struct net *ct_net;
#endif #endif
struct rcu_head rcu;
}; };
static inline struct nf_conn * static inline struct nf_conn *
...@@ -190,9 +188,13 @@ static inline void nf_ct_put(struct nf_conn *ct) ...@@ -190,9 +188,13 @@ static inline void nf_ct_put(struct nf_conn *ct)
extern int nf_ct_l3proto_try_module_get(unsigned short l3proto); extern int nf_ct_l3proto_try_module_get(unsigned short l3proto);
extern void nf_ct_l3proto_module_put(unsigned short l3proto); extern void nf_ct_l3proto_module_put(unsigned short l3proto);
extern struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced); /*
extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, * Allocate a hashtable of hlist_head (if nulls == 0),
unsigned int size); * or hlist_nulls_head (if nulls == 1)
*/
extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls);
extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);
extern struct nf_conntrack_tuple_hash * extern struct nf_conntrack_tuple_hash *
__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/netfilter/x_tables.h> #include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h> #include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <linux/list_nulls.h>
/* A `tuple' is a structure containing the information to uniquely /* A `tuple' is a structure containing the information to uniquely
identify a connection. ie. if two packets have the same tuple, they identify a connection. ie. if two packets have the same tuple, they
...@@ -146,9 +147,8 @@ static inline void nf_ct_dump_tuple(const struct nf_conntrack_tuple *t) ...@@ -146,9 +147,8 @@ static inline void nf_ct_dump_tuple(const struct nf_conntrack_tuple *t)
((enum ip_conntrack_dir)(h)->tuple.dst.dir) ((enum ip_conntrack_dir)(h)->tuple.dst.dir)
/* Connections have two entries in the hash table: one for each way */ /* Connections have two entries in the hash table: one for each way */
struct nf_conntrack_tuple_hash struct nf_conntrack_tuple_hash {
{ struct hlist_nulls_node hnnode;
struct hlist_node hnode;
struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple tuple;
}; };
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define __NETNS_CONNTRACK_H #define __NETNS_CONNTRACK_H
#include <linux/list.h> #include <linux/list.h>
#include <linux/list_nulls.h>
#include <asm/atomic.h> #include <asm/atomic.h>
struct ctl_table_header; struct ctl_table_header;
...@@ -10,9 +11,9 @@ struct nf_conntrack_ecache; ...@@ -10,9 +11,9 @@ struct nf_conntrack_ecache;
struct netns_ct { struct netns_ct {
atomic_t count; atomic_t count;
unsigned int expect_count; unsigned int expect_count;
struct hlist_head *hash; struct hlist_nulls_head *hash;
struct hlist_head *expect_hash; struct hlist_head *expect_hash;
struct hlist_head unconfirmed; struct hlist_nulls_head unconfirmed;
struct ip_conntrack_stat *stat; struct ip_conntrack_stat *stat;
#ifdef CONFIG_NF_CONNTRACK_EVENTS #ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache; struct nf_conntrack_ecache *ecache;
......
...@@ -25,40 +25,42 @@ struct ct_iter_state { ...@@ -25,40 +25,42 @@ struct ct_iter_state {
unsigned int bucket; unsigned int bucket;
}; };
static struct hlist_node *ct_get_first(struct seq_file *seq) static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
{ {
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private; struct ct_iter_state *st = seq->private;
struct hlist_node *n; struct hlist_nulls_node *n;
for (st->bucket = 0; for (st->bucket = 0;
st->bucket < nf_conntrack_htable_size; st->bucket < nf_conntrack_htable_size;
st->bucket++) { st->bucket++) {
n = rcu_dereference(net->ct.hash[st->bucket].first); n = rcu_dereference(net->ct.hash[st->bucket].first);
if (n) if (!is_a_nulls(n))
return n; return n;
} }
return NULL; return NULL;
} }
static struct hlist_node *ct_get_next(struct seq_file *seq, static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
struct hlist_node *head) struct hlist_nulls_node *head)
{ {
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private; struct ct_iter_state *st = seq->private;
head = rcu_dereference(head->next); head = rcu_dereference(head->next);
while (head == NULL) { while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
if (++st->bucket >= nf_conntrack_htable_size) if (++st->bucket >= nf_conntrack_htable_size)
return NULL; return NULL;
}
head = rcu_dereference(net->ct.hash[st->bucket].first); head = rcu_dereference(net->ct.hash[st->bucket].first);
} }
return head; return head;
} }
static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
{ {
struct hlist_node *head = ct_get_first(seq); struct hlist_nulls_node *head = ct_get_first(seq);
if (head) if (head)
while (pos && (head = ct_get_next(seq, head))) while (pos && (head = ct_get_next(seq, head)))
...@@ -87,69 +89,76 @@ static void ct_seq_stop(struct seq_file *s, void *v) ...@@ -87,69 +89,76 @@ static void ct_seq_stop(struct seq_file *s, void *v)
static int ct_seq_show(struct seq_file *s, void *v) static int ct_seq_show(struct seq_file *s, void *v)
{ {
const struct nf_conntrack_tuple_hash *hash = v; struct nf_conntrack_tuple_hash *hash = v;
const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
const struct nf_conntrack_l3proto *l3proto; const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto; const struct nf_conntrack_l4proto *l4proto;
int ret = 0;
NF_CT_ASSERT(ct); NF_CT_ASSERT(ct);
if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
return 0;
/* we only want to print DIR_ORIGINAL */ /* we only want to print DIR_ORIGINAL */
if (NF_CT_DIRECTION(hash)) if (NF_CT_DIRECTION(hash))
return 0; goto release;
if (nf_ct_l3num(ct) != AF_INET) if (nf_ct_l3num(ct) != AF_INET)
return 0; goto release;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
NF_CT_ASSERT(l3proto); NF_CT_ASSERT(l3proto);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
NF_CT_ASSERT(l4proto); NF_CT_ASSERT(l4proto);
ret = -ENOSPC;
if (seq_printf(s, "%-8s %u %ld ", if (seq_printf(s, "%-8s %u %ld ",
l4proto->name, nf_ct_protonum(ct), l4proto->name, nf_ct_protonum(ct),
timer_pending(&ct->timeout) timer_pending(&ct->timeout)
? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
return -ENOSPC; goto release;
if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
return -ENOSPC; goto release;
if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
l3proto, l4proto)) l3proto, l4proto))
return -ENOSPC; goto release;
if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
return -ENOSPC; goto release;
if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
if (seq_printf(s, "[UNREPLIED] ")) if (seq_printf(s, "[UNREPLIED] "))
return -ENOSPC; goto release;
if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, l4proto)) l3proto, l4proto))
return -ENOSPC; goto release;
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
return -ENOSPC; goto release;
if (test_bit(IPS_ASSURED_BIT, &ct->status)) if (test_bit(IPS_ASSURED_BIT, &ct->status))
if (seq_printf(s, "[ASSURED] ")) if (seq_printf(s, "[ASSURED] "))
return -ENOSPC; goto release;
#ifdef CONFIG_NF_CONNTRACK_MARK #ifdef CONFIG_NF_CONNTRACK_MARK
if (seq_printf(s, "mark=%u ", ct->mark)) if (seq_printf(s, "mark=%u ", ct->mark))
return -ENOSPC; goto release;
#endif #endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK #ifdef CONFIG_NF_CONNTRACK_SECMARK
if (seq_printf(s, "secmark=%u ", ct->secmark)) if (seq_printf(s, "secmark=%u ", ct->secmark))
return -ENOSPC; goto release;
#endif #endif
if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
return -ENOSPC; goto release;
ret = 0;
return 0; release:
nf_ct_put(ct);
return ret;
} }
static const struct seq_operations ct_seq_ops = { static const struct seq_operations ct_seq_ops = {
......
...@@ -679,7 +679,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, ...@@ -679,7 +679,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
static int __net_init nf_nat_net_init(struct net *net) static int __net_init nf_nat_net_init(struct net *net)
{ {
net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
&net->ipv4.nat_vmalloced); &net->ipv4.nat_vmalloced, 0);
if (!net->ipv4.nat_bysource) if (!net->ipv4.nat_bysource)
return -ENOMEM; return -ENOMEM;
return 0; return 0;
......
This diff is collapsed.
...@@ -604,7 +604,7 @@ int nf_conntrack_expect_init(struct net *net) ...@@ -604,7 +604,7 @@ int nf_conntrack_expect_init(struct net *net)
net->ct.expect_count = 0; net->ct.expect_count = 0;
net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
&net->ct.expect_vmalloc); &net->ct.expect_vmalloc, 0);
if (net->ct.expect_hash == NULL) if (net->ct.expect_hash == NULL)
goto err1; goto err1;
......
...@@ -159,6 +159,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, ...@@ -159,6 +159,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp; struct nf_conntrack_expect *exp;
const struct hlist_node *n, *next; const struct hlist_node *n, *next;
const struct hlist_nulls_node *nn;
unsigned int i; unsigned int i;
/* Get rid of expectations */ /* Get rid of expectations */
...@@ -175,10 +176,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, ...@@ -175,10 +176,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
} }
/* Get rid of expecteds, set helpers to NULL. */ /* Get rid of expecteds, set helpers to NULL. */
hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) hlist_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
unhelp(h, me); unhelp(h, me);
for (i = 0; i < nf_conntrack_htable_size; i++) { for (i = 0; i < nf_conntrack_htable_size; i++) {
hlist_for_each_entry(h, n, &net->ct.hash[i], hnode) hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
unhelp(h, me); unhelp(h, me);
} }
} }
...@@ -218,7 +219,7 @@ int nf_conntrack_helper_init(void) ...@@ -218,7 +219,7 @@ int nf_conntrack_helper_init(void)
nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
&nf_ct_helper_vmalloc); &nf_ct_helper_vmalloc, 0);
if (!nf_ct_helper_hash) if (!nf_ct_helper_hash)
return -ENOMEM; return -ENOMEM;
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/rculist_nulls.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/timer.h> #include <linux/timer.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
...@@ -536,7 +537,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -536,7 +537,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct nf_conn *ct, *last; struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple_hash *h;
struct hlist_node *n; struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family; u_int8_t l3proto = nfmsg->nfgen_family;
...@@ -544,27 +545,27 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -544,27 +545,27 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
last = (struct nf_conn *)cb->args[1]; last = (struct nf_conn *)cb->args[1];
for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
restart: restart:
hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
hnode) { hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue; continue;
ct = nf_ct_tuplehash_to_ctrack(h); ct = nf_ct_tuplehash_to_ctrack(h);
if (!atomic_inc_not_zero(&ct->ct_general.use))
continue;
/* Dump entries of a given L3 protocol number. /* Dump entries of a given L3 protocol number.
* If it is not specified, ie. l3proto == 0, * If it is not specified, ie. l3proto == 0,
* then dump everything. */ * then dump everything. */
if (l3proto && nf_ct_l3num(ct) != l3proto) if (l3proto && nf_ct_l3num(ct) != l3proto)
continue; goto releasect;
if (cb->args[1]) { if (cb->args[1]) {
if (ct != last) if (ct != last)
continue; goto releasect;
cb->args[1] = 0; cb->args[1] = 0;
} }
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, cb->nlh->nlmsg_seq,
IPCTNL_MSG_CT_NEW, IPCTNL_MSG_CT_NEW,
1, ct) < 0) { 1, ct) < 0) {
if (!atomic_inc_not_zero(&ct->ct_general.use))
continue;
cb->args[1] = (unsigned long)ct; cb->args[1] = (unsigned long)ct;
goto out; goto out;
} }
...@@ -577,6 +578,8 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -577,6 +578,8 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
if (acct) if (acct)
memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
} }
releasect:
nf_ct_put(ct);
} }
if (cb->args[1]) { if (cb->args[1]) {
cb->args[1] = 0; cb->args[1] = 0;
...@@ -1242,13 +1245,12 @@ ctnetlink_create_conntrack(struct nlattr *cda[], ...@@ -1242,13 +1245,12 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
if (err < 0) if (err < 0)
goto err2; goto err2;
master_h = __nf_conntrack_find(&init_net, &master); master_h = nf_conntrack_find_get(&init_net, &master);
if (master_h == NULL) { if (master_h == NULL) {
err = -ENOENT; err = -ENOENT;
goto err2; goto err2;
} }
master_ct = nf_ct_tuplehash_to_ctrack(master_h); master_ct = nf_ct_tuplehash_to_ctrack(master_h);
nf_conntrack_get(&master_ct->ct_general);
__set_bit(IPS_EXPECTED_BIT, &ct->status); __set_bit(IPS_EXPECTED_BIT, &ct->status);
ct->master = master_ct; ct->master = master_ct;
} }
......
...@@ -44,40 +44,42 @@ struct ct_iter_state { ...@@ -44,40 +44,42 @@ struct ct_iter_state {
unsigned int bucket; unsigned int bucket;
}; };
static struct hlist_node *ct_get_first(struct seq_file *seq) static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
{ {
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private; struct ct_iter_state *st = seq->private;
struct hlist_node *n; struct hlist_nulls_node *n;
for (st->bucket = 0; for (st->bucket = 0;
st->bucket < nf_conntrack_htable_size; st->bucket < nf_conntrack_htable_size;
st->bucket++) { st->bucket++) {
n = rcu_dereference(net->ct.hash[st->bucket].first); n = rcu_dereference(net->ct.hash[st->bucket].first);
if (n) if (!is_a_nulls(n))
return n; return n;
} }
return NULL; return NULL;
} }
static struct hlist_node *ct_get_next(struct seq_file *seq, static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
struct hlist_node *head) struct hlist_nulls_node *head)
{ {
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private; struct ct_iter_state *st = seq->private;
head = rcu_dereference(head->next); head = rcu_dereference(head->next);
while (head == NULL) { while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
if (++st->bucket >= nf_conntrack_htable_size) if (++st->bucket >= nf_conntrack_htable_size)
return NULL; return NULL;
}
head = rcu_dereference(net->ct.hash[st->bucket].first); head = rcu_dereference(net->ct.hash[st->bucket].first);
} }
return head; return head;
} }
static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
{ {
struct hlist_node *head = ct_get_first(seq); struct hlist_nulls_node *head = ct_get_first(seq);
if (head) if (head)
while (pos && (head = ct_get_next(seq, head))) while (pos && (head = ct_get_next(seq, head)))
...@@ -107,67 +109,74 @@ static void ct_seq_stop(struct seq_file *s, void *v) ...@@ -107,67 +109,74 @@ static void ct_seq_stop(struct seq_file *s, void *v)
/* return 0 on success, 1 in case of error */ /* return 0 on success, 1 in case of error */
static int ct_seq_show(struct seq_file *s, void *v) static int ct_seq_show(struct seq_file *s, void *v)
{ {
const struct nf_conntrack_tuple_hash *hash = v; struct nf_conntrack_tuple_hash *hash = v;
const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
const struct nf_conntrack_l3proto *l3proto; const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto; const struct nf_conntrack_l4proto *l4proto;
int ret = 0;
NF_CT_ASSERT(ct); NF_CT_ASSERT(ct);
if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
return 0;
/* we only want to print DIR_ORIGINAL */ /* we only want to print DIR_ORIGINAL */
if (NF_CT_DIRECTION(hash)) if (NF_CT_DIRECTION(hash))
return 0; goto release;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
NF_CT_ASSERT(l3proto); NF_CT_ASSERT(l3proto);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
NF_CT_ASSERT(l4proto); NF_CT_ASSERT(l4proto);
ret = -ENOSPC;
if (seq_printf(s, "%-8s %u %-8s %u %ld ", if (seq_printf(s, "%-8s %u %-8s %u %ld ",
l3proto->name, nf_ct_l3num(ct), l3proto->name, nf_ct_l3num(ct),
l4proto->name, nf_ct_protonum(ct), l4proto->name, nf_ct_protonum(ct),
timer_pending(&ct->timeout) timer_pending(&ct->timeout)
? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
return -ENOSPC; goto release;
if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
return -ENOSPC; goto release;
if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
l3proto, l4proto)) l3proto, l4proto))
return -ENOSPC; goto release;
if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
return -ENOSPC; goto release;
if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
if (seq_printf(s, "[UNREPLIED] ")) if (seq_printf(s, "[UNREPLIED] "))
return -ENOSPC; goto release;
if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, l4proto)) l3proto, l4proto))
return -ENOSPC; goto release;
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
return -ENOSPC; goto release;
if (test_bit(IPS_ASSURED_BIT, &ct->status)) if (test_bit(IPS_ASSURED_BIT, &ct->status))
if (seq_printf(s, "[ASSURED] ")) if (seq_printf(s, "[ASSURED] "))
return -ENOSPC; goto release;
#if defined(CONFIG_NF_CONNTRACK_MARK) #if defined(CONFIG_NF_CONNTRACK_MARK)
if (seq_printf(s, "mark=%u ", ct->mark)) if (seq_printf(s, "mark=%u ", ct->mark))
return -ENOSPC; goto release;
#endif #endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK #ifdef CONFIG_NF_CONNTRACK_SECMARK
if (seq_printf(s, "secmark=%u ", ct->secmark)) if (seq_printf(s, "secmark=%u ", ct->secmark))
return -ENOSPC; goto release;
#endif #endif
if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
return -ENOSPC; goto release;
ret = 0;
release:
nf_ct_put(ct);
return 0; return 0;
} }
......
...@@ -108,7 +108,7 @@ static int count_them(struct xt_connlimit_data *data, ...@@ -108,7 +108,7 @@ static int count_them(struct xt_connlimit_data *data,
const struct nf_conntrack_tuple_hash *found; const struct nf_conntrack_tuple_hash *found;
struct xt_connlimit_conn *conn; struct xt_connlimit_conn *conn;
struct xt_connlimit_conn *tmp; struct xt_connlimit_conn *tmp;
const struct nf_conn *found_ct; struct nf_conn *found_ct;
struct list_head *hash; struct list_head *hash;
bool addit = true; bool addit = true;
int matches = 0; int matches = 0;
...@@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data, ...@@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data,
/* check the saved connections */ /* check the saved connections */
list_for_each_entry_safe(conn, tmp, hash, list) { list_for_each_entry_safe(conn, tmp, hash, list) {
found = __nf_conntrack_find(&init_net, &conn->tuple); found = nf_conntrack_find_get(&init_net, &conn->tuple);
found_ct = NULL; found_ct = NULL;
if (found != NULL) if (found != NULL)
...@@ -151,6 +151,7 @@ static int count_them(struct xt_connlimit_data *data, ...@@ -151,6 +151,7 @@ static int count_them(struct xt_connlimit_data *data,
* we do not care about connections which are * we do not care about connections which are
* closed already -> ditch it * closed already -> ditch it
*/ */
nf_ct_put(found_ct);
list_del(&conn->list); list_del(&conn->list);
kfree(conn); kfree(conn);
continue; continue;
...@@ -160,6 +161,7 @@ static int count_them(struct xt_connlimit_data *data, ...@@ -160,6 +161,7 @@ static int count_them(struct xt_connlimit_data *data,
match->family)) match->family))
/* same source network -> be counted! */ /* same source network -> be counted! */
++matches; ++matches;
nf_ct_put(found_ct);
} }
rcu_read_unlock(); rcu_read_unlock();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment