Commit e8ad1a8f authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://1984.lsi.us.es/nf-next

Pablo Neira Ayuso says:

====================
* Remove limitation in the maximum number of supported sets in ipset.
  Now ipset automagically increments the number of slots in the array
  of sets by 64 new spare slots, from Jozsef Kadlecsik.

* Partially remove the generic queue infrastructure now that ip_queue
  is gone. Its only client is nfnetlink_queue now, from Florian
  Westphal.

* Add missing attribute policy checkings in ctnetlink, from Florian
  Westphal.

* Automagically kill conntrack entries that use the wrong output
  interface for the masquerading case in case of routing changes,
  from Jozsef Kadlecsik.

* Two patches two improve ct object traceability. Now ct objects are
  always placed in any of the existing lists. This allows us to dump
  the content of unconfirmed and dying conntracks via ctnetlink as
  a way to provide more instrumentation in case you suspect leaks,
  from myself.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 099f7aa7 a0ecb85a
......@@ -182,7 +182,7 @@ __nf_conntrack_find(struct net *net, u16 zone,
extern int nf_conntrack_hash_check_insert(struct nf_conn *ct);
extern void nf_ct_delete_from_lists(struct nf_conn *ct);
extern void nf_ct_insert_dying_list(struct nf_conn *ct);
extern void nf_ct_dying_timeout(struct nf_conn *ct);
extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report);
......
......@@ -68,4 +68,19 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
#endif
}
static inline bool nf_nat_oif_changed(unsigned int hooknum,
enum ip_conntrack_info ctinfo,
struct nf_conn_nat *nat,
const struct net_device *out)
{
#if IS_ENABLED(CONFIG_IP_NF_TARGET_MASQUERADE) || \
IS_ENABLED(CONFIG_IP6_NF_TARGET_MASQUERADE)
return nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL &&
nat->masq_index != out->ifindex;
#else
return false;
#endif
}
#endif
......@@ -21,14 +21,10 @@ struct nf_queue_entry {
struct nf_queue_handler {
int (*outfn)(struct nf_queue_entry *entry,
unsigned int queuenum);
char *name;
};
extern int nf_register_queue_handler(u_int8_t pf,
const struct nf_queue_handler *qh);
extern int nf_unregister_queue_handler(u_int8_t pf,
const struct nf_queue_handler *qh);
extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
void nf_register_queue_handler(const struct nf_queue_handler *qh);
void nf_unregister_queue_handler(void);
extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
#endif /* _NF_QUEUE_H */
......@@ -9,6 +9,8 @@ enum cntl_msg_types {
IPCTNL_MSG_CT_GET_CTRZERO,
IPCTNL_MSG_CT_GET_STATS_CPU,
IPCTNL_MSG_CT_GET_STATS,
IPCTNL_MSG_CT_GET_DYING,
IPCTNL_MSG_CT_GET_UNCONFIRMED,
IPCTNL_MSG_MAX
};
......
......@@ -134,6 +134,10 @@ nf_nat_ipv4_fn(unsigned int hooknum,
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_DROP;
}
}
return nf_nat_packet(ct, ctinfo, hooknum, skb);
......
......@@ -137,6 +137,10 @@ nf_nat_ipv6_fn(unsigned int hooknum,
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_DROP;
}
}
return nf_nat_packet(ct, ctinfo, hooknum, skb);
......
......@@ -295,8 +295,6 @@ void __init netfilter_init(void)
panic("cannot create netfilter proc entry");
#endif
if (netfilter_queue_init() < 0)
panic("cannot initialize nf_queue");
if (netfilter_log_init() < 0)
panic("cannot initialize nf_log");
}
This diff is collapsed.
......@@ -221,11 +221,9 @@ destroy_conntrack(struct nf_conntrack *nfct)
* too. */
nf_ct_remove_expectations(ct);
/* We overload first tuple to link into unconfirmed list. */
if (!nf_ct_is_confirmed(ct)) {
/* We overload first tuple to link into unconfirmed or dying list.*/
BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
}
NF_CT_STAT_INC(net, delete);
spin_unlock_bh(&nf_conntrack_lock);
......@@ -247,6 +245,9 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
* Otherwise we can get spurious warnings. */
NF_CT_STAT_INC(net, delete_list);
clean_from_lists(ct);
/* add this conntrack to the dying list */
hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.dying);
spin_unlock_bh(&nf_conntrack_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
......@@ -268,31 +269,23 @@ static void death_by_event(unsigned long ul_conntrack)
}
/* we've got the event delivered, now it's dying */
set_bit(IPS_DYING_BIT, &ct->status);
spin_lock(&nf_conntrack_lock);
hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
spin_unlock(&nf_conntrack_lock);
nf_ct_put(ct);
}
void nf_ct_insert_dying_list(struct nf_conn *ct)
void nf_ct_dying_timeout(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
BUG_ON(ecache == NULL);
/* add this conntrack to the dying list */
spin_lock_bh(&nf_conntrack_lock);
hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.dying);
spin_unlock_bh(&nf_conntrack_lock);
/* set a new timer to retry event delivery */
setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
ecache->timeout.expires = jiffies +
(random32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ecache->timeout);
}
EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
static void death_by_timeout(unsigned long ul_conntrack)
{
......@@ -307,7 +300,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
/* destroy event was not delivered */
nf_ct_delete_from_lists(ct);
nf_ct_insert_dying_list(ct);
nf_ct_dying_timeout(ct);
return;
}
set_bit(IPS_DYING_BIT, &ct->status);
......
......@@ -898,7 +898,8 @@ ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone)
}
static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
[CTA_HELP_NAME] = { .type = NLA_NUL_STRING },
[CTA_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
};
static inline int
......@@ -932,6 +933,8 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_ID] = { .type = NLA_U32 },
[CTA_NAT_DST] = { .type = NLA_NESTED },
[CTA_TUPLE_MASTER] = { .type = NLA_NESTED },
[CTA_NAT_SEQ_ADJ_ORIG] = { .type = NLA_NESTED },
[CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED },
[CTA_ZONE] = { .type = NLA_U16 },
[CTA_MARK_MASK] = { .type = NLA_U32 },
};
......@@ -989,7 +992,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
nlmsg_report(nlh)) < 0) {
nf_ct_delete_from_lists(ct);
/* we failed to report the event, try later */
nf_ct_insert_dying_list(ct);
nf_ct_dying_timeout(ct);
nf_ct_put(ct);
return 0;
}
......@@ -1089,6 +1092,112 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
return err == -EAGAIN ? -ENOBUFS : err;
}
static int ctnetlink_done_list(struct netlink_callback *cb)
{
if (cb->args[1])
nf_ct_put((struct nf_conn *)cb->args[1]);
return 0;
}
static int
ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb,
struct hlist_nulls_head *list)
{
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
int res;
if (cb->args[2])
return 0;
spin_lock_bh(&nf_conntrack_lock);
last = (struct nf_conn *)cb->args[1];
restart:
hlist_nulls_for_each_entry(h, n, list, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (l3proto && nf_ct_l3num(ct) != l3proto)
continue;
if (cb->args[1]) {
if (ct != last)
continue;
cb->args[1] = 0;
}
rcu_read_lock();
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
ct);
rcu_read_unlock();
if (res < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
goto out;
}
}
if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
} else
cb->args[2] = 1;
out:
spin_unlock_bh(&nf_conntrack_lock);
if (last)
nf_ct_put(last);
return skb->len;
}
static int
ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
return ctnetlink_dump_list(skb, cb, &net->ct.dying);
}
static int
ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = ctnetlink_dump_dying,
.done = ctnetlink_done_list,
};
return netlink_dump_start(ctnl, skb, nlh, &c);
}
return -EOPNOTSUPP;
}
static int
ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed);
}
static int
ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = ctnetlink_dump_unconfirmed,
.done = ctnetlink_done_list,
};
return netlink_dump_start(ctnl, skb, nlh, &c);
}
return -EOPNOTSUPP;
}
#ifdef CONFIG_NF_NAT_NEEDED
static int
ctnetlink_parse_nat_setup(struct nf_conn *ct,
......@@ -2216,7 +2325,8 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_MASK] = { .type = NLA_NESTED },
[CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
[CTA_EXPECT_ID] = { .type = NLA_U32 },
[CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING },
[CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
[CTA_EXPECT_ZONE] = { .type = NLA_U16 },
[CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
[CTA_EXPECT_CLASS] = { .type = NLA_U32 },
......@@ -2712,6 +2822,8 @@ static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
.policy = ct_nla_policy },
[IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu },
[IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct },
[IPCTNL_MSG_CT_GET_DYING] = { .call = ctnetlink_get_ct_dying },
[IPCTNL_MSG_CT_GET_UNCONFIRMED] = { .call = ctnetlink_get_ct_unconfirmed },
};
static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
......
......@@ -1353,6 +1353,8 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
[CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 },
[CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 },
[CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 },
[CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 },
[CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 },
};
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
......
......@@ -14,84 +14,32 @@
#include "nf_internals.h"
/*
* A queue handler may be registered for each protocol. Each is protected by
* long term mutex. The handler must provide an an outfn() to accept packets
* for queueing and must reinject all packets it receives, no matter what.
* Hook for nfnetlink_queue to register its queue handler.
* We do this so that most of the NFQUEUE code can be modular.
*
* Once the queue is registered it must reinject all packets it
* receives, no matter what.
*/
static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
static DEFINE_MUTEX(queue_handler_mutex);
static const struct nf_queue_handler __rcu *queue_handler __read_mostly;
/* return EBUSY when somebody else is registered, return EEXIST if the
* same handler is registered, return 0 in case of success. */
int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
void nf_register_queue_handler(const struct nf_queue_handler *qh)
{
int ret;
const struct nf_queue_handler *old;
if (pf >= ARRAY_SIZE(queue_handler))
return -EINVAL;
mutex_lock(&queue_handler_mutex);
old = rcu_dereference_protected(queue_handler[pf],
lockdep_is_held(&queue_handler_mutex));
if (old == qh)
ret = -EEXIST;
else if (old)
ret = -EBUSY;
else {
rcu_assign_pointer(queue_handler[pf], qh);
ret = 0;
}
mutex_unlock(&queue_handler_mutex);
return ret;
/* should never happen, we only have one queueing backend in kernel */
WARN_ON(rcu_access_pointer(queue_handler));
rcu_assign_pointer(queue_handler, qh);
}
EXPORT_SYMBOL(nf_register_queue_handler);
/* The caller must flush their queue before this */
int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
void nf_unregister_queue_handler(void)
{
const struct nf_queue_handler *old;
if (pf >= ARRAY_SIZE(queue_handler))
return -EINVAL;
mutex_lock(&queue_handler_mutex);
old = rcu_dereference_protected(queue_handler[pf],
lockdep_is_held(&queue_handler_mutex));
if (old && old != qh) {
mutex_unlock(&queue_handler_mutex);
return -EINVAL;
}
RCU_INIT_POINTER(queue_handler[pf], NULL);
mutex_unlock(&queue_handler_mutex);
RCU_INIT_POINTER(queue_handler, NULL);
synchronize_rcu();
return 0;
}
EXPORT_SYMBOL(nf_unregister_queue_handler);
void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
{
u_int8_t pf;
mutex_lock(&queue_handler_mutex);
for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) {
if (rcu_dereference_protected(
queue_handler[pf],
lockdep_is_held(&queue_handler_mutex)
) == qh)
RCU_INIT_POINTER(queue_handler[pf], NULL);
}
mutex_unlock(&queue_handler_mutex);
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
/* Release those devices we held, or Alexey will kill me. */
......@@ -137,7 +85,7 @@ static int __nf_queue(struct sk_buff *skb,
/* QUEUE == DROP if no one is waiting, to be safe. */
rcu_read_lock();
qh = rcu_dereference(queue_handler[pf]);
qh = rcu_dereference(queue_handler);
if (!qh) {
status = -ESRCH;
goto err_unlock;
......@@ -344,77 +292,3 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
kfree(entry);
}
EXPORT_SYMBOL(nf_reinject);
#ifdef CONFIG_PROC_FS
static void *seq_start(struct seq_file *seq, loff_t *pos)
{
if (*pos >= ARRAY_SIZE(queue_handler))
return NULL;
return pos;
}
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
if (*pos >= ARRAY_SIZE(queue_handler))
return NULL;
return pos;
}
static void seq_stop(struct seq_file *s, void *v)
{
}
static int seq_show(struct seq_file *s, void *v)
{
int ret;
loff_t *pos = v;
const struct nf_queue_handler *qh;
rcu_read_lock();
qh = rcu_dereference(queue_handler[*pos]);
if (!qh)
ret = seq_printf(s, "%2lld NONE\n", *pos);
else
ret = seq_printf(s, "%2lld %s\n", *pos, qh->name);
rcu_read_unlock();
return ret;
}
static const struct seq_operations nfqueue_seq_ops = {
.start = seq_start,
.next = seq_next,
.stop = seq_stop,
.show = seq_show,
};
static int nfqueue_open(struct inode *inode, struct file *file)
{
return seq_open(file, &nfqueue_seq_ops);
}
static const struct file_operations nfqueue_file_ops = {
.owner = THIS_MODULE,
.open = nfqueue_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#endif /* PROC_FS */
int __init netfilter_queue_init(void)
{
#ifdef CONFIG_PROC_FS
if (!proc_create("nf_queue", S_IRUGO,
proc_net_netfilter, &nfqueue_file_ops))
return -1;
#endif
return 0;
}
......@@ -809,7 +809,6 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
};
static const struct nf_queue_handler nfqh = {
.name = "nf_queue",
.outfn = &nfqnl_enqueue_packet,
};
......@@ -827,14 +826,10 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
if (nfqa[NFQA_CFG_CMD]) {
cmd = nla_data(nfqa[NFQA_CFG_CMD]);
/* Commands without queue context - might sleep */
/* Obsolete commands without queue context */
switch (cmd->command) {
case NFQNL_CFG_CMD_PF_BIND:
return nf_register_queue_handler(ntohs(cmd->pf),
&nfqh);
case NFQNL_CFG_CMD_PF_UNBIND:
return nf_unregister_queue_handler(ntohs(cmd->pf),
&nfqh);
case NFQNL_CFG_CMD_PF_BIND: return 0;
case NFQNL_CFG_CMD_PF_UNBIND: return 0;
}
}
......@@ -1074,6 +1069,7 @@ static int __init nfnetlink_queue_init(void)
#endif
register_netdevice_notifier(&nfqnl_dev_notifier);
nf_register_queue_handler(&nfqh);
return status;
#ifdef CONFIG_PROC_FS
......@@ -1087,7 +1083,7 @@ static int __init nfnetlink_queue_init(void)
static void __exit nfnetlink_queue_fini(void)
{
nf_unregister_queue_handlers(&nfqh);
nf_unregister_queue_handler();
unregister_netdevice_notifier(&nfqnl_dev_notifier);
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment