Commit da134825 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://1984.lsi.us.es/nf-next

Pablo Neira Ayuso says:

====================
The following patchset contains Netfilter/IPVS updates for
your net-next tree, they are:

* Better performance in nfnetlink_queue by avoiding copy from the
  packet to netlink message, from Eric Dumazet.

* Remove unnecessary locking in the exit path of ebt_ulog, from Gao Feng.

* Use new function ipv6_iface_scope_id in nf_ct_ipv6, from Hannes Frederic Sowa.

* A couple of sparse fixes for IPVS, from Julian Anastasov.

* Use xor hashing in nfnetlink_queue, as suggested by Eric Dumazet, from
  myself.

* Allow to dump expectations per master conntrack via ctnetlink, from myself.

* A couple of cleanups to use PTR_RET in module init path, from Silviu-Mihai
  Popescu.

* Remove nf_conntrack module a bit faster if netns are in use, from
  Vladimir Davydov.

* Use checksum_partial in ip6t_NPT, from YOSHIFUJI Hideaki.

* Sparse fix for nf_conntrack, from Stephen Hemminger.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f5a03cf4 dece40e8
......@@ -459,7 +459,7 @@ struct ip_vs_estimator {
struct ip_vs_stats {
struct ip_vs_stats_user ustats; /* statistics */
struct ip_vs_estimator est; /* estimator */
struct ip_vs_cpu_stats *cpustats; /* per cpu counters */
struct ip_vs_cpu_stats __percpu *cpustats; /* per cpu counters */
spinlock_t lock; /* spin lock */
struct ip_vs_stats_user ustats0; /* reset values */
};
......
......@@ -27,6 +27,7 @@ extern unsigned int nf_conntrack_in(struct net *net,
extern int nf_conntrack_init_net(struct net *net);
extern void nf_conntrack_cleanup_net(struct net *net);
extern void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list);
extern int nf_conntrack_proto_pernet_init(struct net *net);
extern void nf_conntrack_proto_pernet_fini(struct net *net);
......
......@@ -319,12 +319,11 @@ static void __exit ebt_ulog_fini(void)
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
ub = &ulog_buffers[i];
del_timer(&ub->timer);
spin_lock_bh(&ub->lock);
if (ub->skb) {
kfree_skb(ub->skb);
ub->skb = NULL;
}
spin_unlock_bh(&ub->lock);
}
netlink_kernel_release(ebtulognl);
}
......
......@@ -64,9 +64,7 @@ static int ebt_broute(struct sk_buff *skb)
static int __net_init broute_net_init(struct net *net)
{
net->xt.broute_table = ebt_register_table(net, &broute_table);
if (IS_ERR(net->xt.broute_table))
return PTR_ERR(net->xt.broute_table);
return 0;
return PTR_RET(net->xt.broute_table);
}
static void __net_exit broute_net_exit(struct net *net)
......
......@@ -48,9 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
net->ipv4.arptable_filter =
arpt_register_table(net, &packet_filter, repl);
kfree(repl);
if (IS_ERR(net->ipv4.arptable_filter))
return PTR_ERR(net->ipv4.arptable_filter);
return 0;
return PTR_RET(net->ipv4.arptable_filter);
}
static void __net_exit arptable_filter_net_exit(struct net *net)
......
......@@ -18,9 +18,8 @@
static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
{
struct ip6t_npt_tginfo *npt = par->targinfo;
__wsum src_sum = 0, dst_sum = 0;
struct in6_addr pfx;
unsigned int i;
__wsum src_sum, dst_sum;
if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
return -EINVAL;
......@@ -33,12 +32,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
src_sum = csum_add(src_sum,
(__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
dst_sum = csum_add(dst_sum,
(__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
}
src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0);
dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0);
npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
return 0;
......
......@@ -330,12 +330,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
sizeof(sin6.sin6_addr));
nf_ct_put(ct);
if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6.sin6_scope_id = sk->sk_bound_dev_if;
else
sin6.sin6_scope_id = 0;
sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
sk->sk_bound_dev_if);
return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
}
......
......@@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
int ip_vs_net_id __read_mostly;
#ifdef IP_VS_GENERIC_NETNS
EXPORT_SYMBOL(ip_vs_net_id);
#endif
static int ip_vs_net_id __read_mostly;
/* netns cnt used for uniqueness */
static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
......@@ -1181,9 +1178,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
struct net *net =
dev_net(skb_dst(skb)->dev);
if (!skb->dev)
skb->dev = net->loopback_dev;
icmpv6_send(skb,
......
......@@ -271,16 +271,18 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
{
register unsigned int porth = ntohs(port);
__be32 addr_fold = addr->ip;
__u32 ahash;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
addr_fold ^= ((size_t)net>>8);
ahash = ntohl(addr_fold);
ahash ^= ((size_t) net >> 8);
return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
& IP_VS_SVC_TAB_MASK;
return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
IP_VS_SVC_TAB_MASK;
}
/*
......
......@@ -56,7 +56,7 @@
* Make a summary from each cpu
*/
static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
struct ip_vs_cpu_stats *stats)
struct ip_vs_cpu_stats __percpu *stats)
{
int i;
......
......@@ -48,6 +48,7 @@
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#define NF_CONNTRACK_VERSION "0.5.0"
......@@ -1364,30 +1365,48 @@ void nf_conntrack_cleanup_end(void)
*/
void nf_conntrack_cleanup_net(struct net *net)
{
LIST_HEAD(single);
list_add(&net->exit_list, &single);
nf_conntrack_cleanup_net_list(&single);
}
void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
{
int busy;
struct net *net;
/*
* This makes sure all current packets have passed through
* netfilter framework. Roll on, two-stage module
* delete...
*/
synchronize_net();
i_see_dead_people:
nf_ct_iterate_cleanup(net, kill_all, NULL);
nf_ct_release_dying_list(net);
if (atomic_read(&net->ct.count) != 0) {
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
nf_ct_iterate_cleanup(net, kill_all, NULL);
nf_ct_release_dying_list(net);
if (atomic_read(&net->ct.count) != 0)
busy = 1;
}
if (busy) {
schedule();
goto i_see_dead_people;
}
nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
nf_conntrack_proto_pernet_fini(net);
nf_conntrack_helper_pernet_fini(net);
nf_conntrack_ecache_pernet_fini(net);
nf_conntrack_tstamp_pernet_fini(net);
nf_conntrack_acct_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
kfree(net->ct.slabname);
free_percpu(net->ct.stat);
list_for_each_entry(net, net_exit_list, exit_list) {
nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
nf_conntrack_proto_pernet_fini(net);
nf_conntrack_helper_pernet_fini(net);
nf_conntrack_ecache_pernet_fini(net);
nf_conntrack_tstamp_pernet_fini(net);
nf_conntrack_acct_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
kfree(net->ct.slabname);
free_percpu(net->ct.stat);
}
}
void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
......
......@@ -2409,6 +2409,92 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
static int
ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nf_conntrack_expect *exp, *last;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
struct nf_conn *ct = cb->data;
struct nf_conn_help *help = nfct_help(ct);
u_int8_t l3proto = nfmsg->nfgen_family;
if (cb->args[0])
return 0;
rcu_read_lock();
last = (struct nf_conntrack_expect *)cb->args[1];
restart:
hlist_for_each_entry(exp, &help->expectations, lnode) {
if (l3proto && exp->tuple.src.l3num != l3proto)
continue;
if (cb->args[1]) {
if (exp != last)
continue;
cb->args[1] = 0;
}
if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
if (!atomic_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
}
}
if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
}
cb->args[0] = 1;
out:
rcu_read_unlock();
if (last)
nf_ct_expect_put(last);
return skb->len;
}
static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
int err;
struct net *net = sock_net(ctnl);
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
u16 zone = 0;
struct netlink_dump_control c = {
.dump = ctnetlink_exp_ct_dump_table,
.done = ctnetlink_exp_done,
};
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
if (err < 0)
return err;
if (cda[CTA_EXPECT_ZONE]) {
err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
if (err < 0)
return err;
}
h = nf_conntrack_find_get(net, zone, &tuple);
if (!h)
return -ENOENT;
ct = nf_ct_tuplehash_to_ctrack(h);
c.data = ct;
err = netlink_dump_start(ctnl, skb, nlh, &c);
nf_ct_put(ct);
return err;
}
static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
[CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
......@@ -2439,11 +2525,15 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
int err;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = ctnetlink_exp_dump_table,
.done = ctnetlink_exp_done,
};
return netlink_dump_start(ctnl, skb, nlh, &c);
if (cda[CTA_EXPECT_MASTER])
return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda);
else {
struct netlink_dump_control c = {
.dump = ctnetlink_exp_dump_table,
.done = ctnetlink_exp_done,
};
return netlink_dump_start(ctnl, skb, nlh, &c);
}
}
err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
......
......@@ -545,16 +545,20 @@ static int nf_conntrack_pernet_init(struct net *net)
return ret;
}
static void nf_conntrack_pernet_exit(struct net *net)
static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
{
nf_conntrack_standalone_fini_sysctl(net);
nf_conntrack_standalone_fini_proc(net);
nf_conntrack_cleanup_net(net);
struct net *net;
list_for_each_entry(net, net_exit_list, exit_list) {
nf_conntrack_standalone_fini_sysctl(net);
nf_conntrack_standalone_fini_proc(net);
}
nf_conntrack_cleanup_net_list(net_exit_list);
}
static struct pernet_operations nf_conntrack_net_ops = {
.init = nf_conntrack_pernet_init,
.exit = nf_conntrack_pernet_exit,
.init = nf_conntrack_pernet_init,
.exit_batch = nf_conntrack_pernet_exit,
};
static int __init nf_conntrack_standalone_init(void)
......
......@@ -73,7 +73,7 @@ static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
static inline u_int8_t instance_hashfn(u_int16_t queue_num)
{
return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS;
return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
}
static struct nfqnl_instance *
......@@ -217,14 +217,59 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
spin_unlock_bh(&queue->lock);
}
static void
nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
{
int i, j = 0;
int plen = 0; /* length of skb->head fragment */
struct page *page;
unsigned int offset;
/* dont bother with small payloads */
if (len <= skb_tailroom(to)) {
skb_copy_bits(from, 0, skb_put(to, len), len);
return;
}
if (hlen) {
skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
len -= hlen;
} else {
plen = min_t(int, skb_headlen(from), len);
if (plen) {
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
get_page(page);
j = 1;
len -= plen;
}
}
to->truesize += len + plen;
to->len += len + plen;
to->data_len += len + plen;
for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
if (!len)
break;
skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
len -= skb_shinfo(to)->frags[j].size;
skb_frag_ref(to, j);
j++;
}
skb_shinfo(to)->nr_frags = j;
}
static struct sk_buff *
nfqnl_build_packet_message(struct nfqnl_instance *queue,
struct nf_queue_entry *entry,
__be32 **packet_id_ptr)
{
sk_buff_data_t old_tail;
size_t size;
size_t data_len = 0, cap_len = 0;
int hlen = 0;
struct sk_buff *skb;
struct nlattr *nla;
struct nfqnl_msg_packet_hdr *pmsg;
......@@ -246,8 +291,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
#endif
+ nla_total_size(sizeof(u_int32_t)) /* mark */
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
+ nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)
+ nla_total_size(sizeof(u_int32_t))); /* cap_len */
+ nla_total_size(sizeof(u_int32_t)); /* cap_len */
if (entskb->tstamp.tv64)
size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
outdev = entry->outdev;
......@@ -265,7 +312,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (data_len == 0 || data_len > entskb->len)
data_len = entskb->len;
size += nla_total_size(data_len);
if (!entskb->head_frag ||
skb_headlen(entskb) < L1_CACHE_BYTES ||
skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
hlen = skb_headlen(entskb);
if (skb_has_frag_list(entskb))
hlen = entskb->len;
hlen = min_t(int, data_len, hlen);
size += sizeof(struct nlattr) + hlen;
cap_len = entskb->len;
break;
}
......@@ -277,7 +333,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (!skb)
return NULL;
old_tail = skb->tail;
nlh = nlmsg_put(skb, 0, 0,
NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
sizeof(struct nfgenmsg), 0);
......@@ -382,31 +437,26 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
goto nla_put_failure;
}
if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
goto nla_put_failure;
if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
goto nla_put_failure;
if (data_len) {
struct nlattr *nla;
int sz = nla_attr_size(data_len);
if (skb_tailroom(skb) < nla_total_size(data_len)) {
printk(KERN_WARNING "nf_queue: no tailroom!\n");
kfree_skb(skb);
return NULL;
}
if (skb_tailroom(skb) < sizeof(*nla) + hlen)
goto nla_put_failure;
nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
nla = (struct nlattr *)skb_put(skb, sizeof(*nla));
nla->nla_type = NFQA_PAYLOAD;
nla->nla_len = sz;
nla->nla_len = nla_attr_size(data_len);
if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
BUG();
nfqnl_zcopy(skb, entskb, data_len, hlen);
}
if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
goto nla_put_failure;
if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
goto nla_put_failure;
nlh->nlmsg_len = skb->tail - old_tail;
nlh->nlmsg_len = skb->len;
return skb;
nla_put_failure:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment