Commit d85427e3 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Rewrite inner header IPv6 in ICMPv6 messages in ip6t_NPT,
   from Michael Zhou.

2) do_ip_vs_set_ctl() dereferences uninitialized value,
   from Peilin Ye.

3) Support for userdata in tables, from Jose M. Guisado.

4) Do not increment ct error and invalid stats at the same time,
   from Florian Westphal.

5) Remove ct ignore stats, also from Florian.

6) Add ct stats for clash resolution, from Florian Westphal.

7) Bump reference counter bump on ct clash resolution only,
   this is safe because bucket lock is held, again from Florian.

8) Use ip_is_fragment() in xt_HMARK, from YueHaibing.

9) Add wildcard support for nft_socket, from Balazs Scheidler.

10) Remove superfluous IPVS dependency on iptables, from
    Yaroslav Bolyukin.

11) Remove unused definition in ebt_stp, from Wang Hai.

12) Replace CONFIG_NFT_CHAIN_NAT_{IPV4,IPV6} by CONFIG_NFT_NAT
    in selftests/net, from Fabian Frederick.

13) Add userdata support for nft_object, from Jose M. Guisado.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ac99a822 b131c964
......@@ -8,9 +8,9 @@
struct ip_conntrack_stat {
unsigned int found;
unsigned int invalid;
unsigned int ignore;
unsigned int insert;
unsigned int insert_failed;
unsigned int clash_resolve;
unsigned int drop;
unsigned int early_drop;
unsigned int error;
......
......@@ -25,9 +25,6 @@
#include <linux/ip.h>
#include <linux/ipv6.h> /* for struct ipv6hdr */
#include <net/ipv6.h>
#if IS_ENABLED(CONFIG_IP_VS_IPV6)
#include <linux/netfilter_ipv6/ip6_tables.h>
#endif
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#endif
......
......@@ -1082,6 +1082,8 @@ struct nft_table {
flags:8,
genmask:2;
char *name;
u16 udlen;
u8 *udata;
};
void nft_register_chain_type(const struct nft_chain_type *);
......@@ -1123,6 +1125,8 @@ struct nft_object {
u32 genmask:2,
use:30;
u64 handle;
u16 udlen;
u8 *udata;
/* runtime data below here */
const struct nft_object_ops *ops ____cacheline_aligned;
unsigned char data[]
......
......@@ -172,6 +172,7 @@ enum nft_table_flags {
* @NFTA_TABLE_NAME: name of the table (NLA_STRING)
* @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
* @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
* @NFTA_TABLE_USERDATA: user data (NLA_BINARY)
*/
enum nft_table_attributes {
NFTA_TABLE_UNSPEC,
......@@ -180,6 +181,7 @@ enum nft_table_attributes {
NFTA_TABLE_USE,
NFTA_TABLE_HANDLE,
NFTA_TABLE_PAD,
NFTA_TABLE_USERDATA,
__NFTA_TABLE_MAX
};
#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1)
......@@ -1008,10 +1010,12 @@ enum nft_socket_attributes {
*
* @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option
* @NFT_SOCKET_MARK: Value of the socket mark
* @NFT_SOCKET_WILDCARD: Whether the socket is zero-bound (e.g. 0.0.0.0 or ::0)
*/
enum nft_socket_keys {
NFT_SOCKET_TRANSPARENT,
NFT_SOCKET_MARK,
NFT_SOCKET_WILDCARD,
__NFT_SOCKET_MAX
};
#define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1)
......@@ -1555,6 +1559,7 @@ enum nft_ct_expectation_attributes {
* @NFTA_OBJ_DATA: stateful object data (NLA_NESTED)
* @NFTA_OBJ_USE: number of references to this expression (NLA_U32)
* @NFTA_OBJ_HANDLE: object handle (NLA_U64)
* @NFTA_OBJ_USERDATA: user data (NLA_BINARY)
*/
enum nft_object_attributes {
NFTA_OBJ_UNSPEC,
......@@ -1565,6 +1570,7 @@ enum nft_object_attributes {
NFTA_OBJ_USE,
NFTA_OBJ_HANDLE,
NFTA_OBJ_PAD,
NFTA_OBJ_USERDATA,
__NFTA_OBJ_MAX
};
#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1)
......
......@@ -247,7 +247,7 @@ enum ctattr_stats_cpu {
CTA_STATS_FOUND,
CTA_STATS_NEW, /* no longer used */
CTA_STATS_INVALID,
CTA_STATS_IGNORE,
CTA_STATS_IGNORE, /* no longer used */
CTA_STATS_DELETE, /* no longer used */
CTA_STATS_DELETE_LIST, /* no longer used */
CTA_STATS_INSERT,
......@@ -256,6 +256,7 @@ enum ctattr_stats_cpu {
CTA_STATS_EARLY_DROP,
CTA_STATS_ERROR,
CTA_STATS_SEARCH_RESTART,
CTA_STATS_CLASH_RESOLVE,
__CTA_STATS_MAX,
};
#define CTA_STATS_MAX (__CTA_STATS_MAX - 1)
......
......@@ -15,7 +15,6 @@
#include <linux/netfilter_bridge/ebt_stp.h>
#define BPDU_TYPE_CONFIG 0
#define BPDU_TYPE_TCN 0x80
struct stp_header {
u8 dsap;
......
......@@ -77,16 +77,43 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
return true;
}
static struct ipv6hdr *icmpv6_bounced_ipv6hdr(struct sk_buff *skb,
struct ipv6hdr *_bounced_hdr)
{
if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
return NULL;
if (!icmpv6_is_err(icmp6_hdr(skb)->icmp6_type))
return NULL;
return skb_header_pointer(skb,
skb_transport_offset(skb) + sizeof(struct icmp6hdr),
sizeof(struct ipv6hdr),
_bounced_hdr);
}
static unsigned int
ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ip6t_npt_tginfo *npt = par->targinfo;
struct ipv6hdr _bounced_hdr;
struct ipv6hdr *bounced_hdr;
struct in6_addr bounced_pfx;
if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) {
icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
offsetof(struct ipv6hdr, saddr));
return NF_DROP;
}
/* rewrite dst addr of bounced packet which was sent to dst range */
bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr);
if (bounced_hdr) {
ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->daddr, npt->src_pfx_len);
if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0)
ip6t_npt_map_pfx(npt, &bounced_hdr->daddr);
}
return XT_CONTINUE;
}
......@@ -94,12 +121,24 @@ static unsigned int
ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ip6t_npt_tginfo *npt = par->targinfo;
struct ipv6hdr _bounced_hdr;
struct ipv6hdr *bounced_hdr;
struct in6_addr bounced_pfx;
if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) {
icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
offsetof(struct ipv6hdr, daddr));
return NF_DROP;
}
/* rewrite src addr of bounced packet which was sent from dst range */
bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr);
if (bounced_hdr) {
ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->saddr, npt->src_pfx_len);
if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0)
ip6t_npt_map_pfx(npt, &bounced_hdr->saddr);
}
return XT_CONTINUE;
}
......
......@@ -29,7 +29,6 @@ if IP_VS
config IP_VS_IPV6
bool "IPv6 support for IPVS"
depends on IPV6 = y || IP_VS = IPV6
select IP6_NF_IPTABLES
select NF_DEFRAG_IPV6
help
Add IPv6 support to IPVS.
......
......@@ -2508,6 +2508,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len)
/* Set timeout values for (tcp tcpfin udp) */
ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
} else if (!len) {
/* No more commands with len == 0 below */
ret = -EINVAL;
goto out_unlock;
}
usvc_compat = (struct ip_vs_service_user *)arg;
......@@ -2584,9 +2588,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len)
break;
case IP_VS_SO_SET_DELDEST:
ret = ip_vs_del_dest(svc, &udest);
break;
default:
ret = -EINVAL;
}
out_unlock:
......
......@@ -859,7 +859,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
out:
nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert_failed);
local_bh_enable();
return -EEXIST;
}
......@@ -909,6 +908,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
tstamp->start = ktime_get_real_ns();
}
/* caller must hold locks to prevent concurrent changes */
static int __nf_ct_resolve_clash(struct sk_buff *skb,
struct nf_conntrack_tuple_hash *h)
{
......@@ -922,23 +922,21 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
if (nf_ct_is_dying(ct))
return NF_DROP;
if (!atomic_inc_not_zero(&ct->ct_general.use))
return NF_DROP;
if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
nf_ct_match(ct, loser_ct)) {
struct net *net = nf_ct_net(ct);
nf_conntrack_get(&ct->ct_general);
nf_ct_acct_merge(ct, ctinfo, loser_ct);
nf_ct_add_to_dying_list(loser_ct);
nf_conntrack_put(&loser_ct->ct_general);
nf_ct_set(skb, ct, ctinfo);
NF_CT_STAT_INC(net, insert_failed);
NF_CT_STAT_INC(net, clash_resolve);
return NF_ACCEPT;
}
nf_ct_put(ct);
return NF_DROP;
}
......@@ -998,6 +996,8 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
&nf_conntrack_hash[repl_idx]);
NF_CT_STAT_INC(net, clash_resolve);
return NF_ACCEPT;
}
......@@ -1027,10 +1027,10 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
*
* Failing that, the new, unconfirmed conntrack is still added to the table
* provided that the collision only occurs in the ORIGINAL direction.
* The new entry will be added after the existing one in the hash list,
* The new entry will be added only in the non-clashing REPLY direction,
* so packets in the ORIGINAL direction will continue to match the existing
* entry. The new entry will also have a fixed timeout so it expires --
* due to the collision, it will not see bidirectional traffic.
* due to the collision, it will only see reply traffic.
*
* Returns NF_DROP if the clash could not be resolved.
*/
......@@ -1725,10 +1725,8 @@ nf_conntrack_handle_icmp(struct nf_conn *tmpl,
else
return NF_ACCEPT;
if (ret <= 0) {
if (ret <= 0)
NF_CT_STAT_INC_ATOMIC(state->net, error);
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
}
return ret;
}
......@@ -1802,10 +1800,8 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
if (tmpl || ctinfo == IP_CT_UNTRACKED) {
/* Previously seen (loopback or untracked)? Ignore. */
if ((tmpl && !nf_ct_is_template(tmpl)) ||
ctinfo == IP_CT_UNTRACKED) {
NF_CT_STAT_INC_ATOMIC(state->net, ignore);
ctinfo == IP_CT_UNTRACKED)
return NF_ACCEPT;
}
skb->_nfct = 0;
}
......@@ -1813,7 +1809,6 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum);
if (dataoff <= 0) {
pr_debug("not prepared to track yet or error occurred\n");
NF_CT_STAT_INC_ATOMIC(state->net, error);
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
ret = NF_ACCEPT;
goto out;
......
......@@ -2509,7 +2509,6 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) ||
nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) ||
nla_put_be32(skb, CTA_STATS_INSERT_FAILED,
htonl(st->insert_failed)) ||
......@@ -2517,7 +2516,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
nla_put_be32(skb, CTA_STATS_EARLY_DROP, htonl(st->early_drop)) ||
nla_put_be32(skb, CTA_STATS_ERROR, htonl(st->error)) ||
nla_put_be32(skb, CTA_STATS_SEARCH_RESTART,
htonl(st->search_restart)))
htonl(st->search_restart)) ||
nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE,
htonl(st->clash_resolve)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
......
......@@ -435,11 +435,11 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
nr_conntracks,
0,
st->clash_resolve, /* was: searched */
st->found,
0,
st->invalid,
st->ignore,
0,
0,
0,
st->insert,
......
......@@ -650,6 +650,8 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
[NFTA_TABLE_HANDLE] = { .type = NLA_U64 },
[NFTA_TABLE_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN }
};
static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
......@@ -676,6 +678,11 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
NFTA_TABLE_PAD))
goto nla_put_failure;
if (table->udata) {
if (nla_put(skb, NFTA_TABLE_USERDATA, table->udlen, table->udata))
goto nla_put_failure;
}
nlmsg_end(skb, nlh);
return 0;
......@@ -977,8 +984,9 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
int family = nfmsg->nfgen_family;
const struct nlattr *attr;
struct nft_table *table;
u32 flags = 0;
struct nft_ctx ctx;
u32 flags = 0;
u16 udlen = 0;
int err;
lockdep_assert_held(&net->nft.commit_mutex);
......@@ -1014,6 +1022,16 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (table->name == NULL)
goto err_strdup;
if (nla[NFTA_TABLE_USERDATA]) {
udlen = nla_len(nla[NFTA_TABLE_USERDATA]);
table->udata = kzalloc(udlen, GFP_KERNEL);
if (table->udata == NULL)
goto err_table_udata;
nla_memcpy(table->udata, nla[NFTA_TABLE_USERDATA], udlen);
table->udlen = udlen;
}
err = rhltable_init(&table->chains_ht, &nft_chain_ht_params);
if (err)
goto err_chain_ht;
......@@ -1036,6 +1054,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
err_trans:
rhltable_destroy(&table->chains_ht);
err_chain_ht:
kfree(table->udata);
err_table_udata:
kfree(table->name);
err_strdup:
kfree(table);
......@@ -5730,6 +5750,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
[NFTA_OBJ_TYPE] = { .type = NLA_U32 },
[NFTA_OBJ_DATA] = { .type = NLA_NESTED },
[NFTA_OBJ_HANDLE] = { .type = NLA_U64},
[NFTA_OBJ_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
};
static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
......@@ -5877,6 +5899,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
struct nft_object *obj;
struct nft_ctx ctx;
u32 objtype;
u16 udlen;
int err;
if (!nla[NFTA_OBJ_TYPE] ||
......@@ -5921,7 +5944,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err1;
goto err_init;
}
obj->key.table = table;
obj->handle = nf_tables_alloc_handle(table);
......@@ -5929,32 +5952,44 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
if (!obj->key.name) {
err = -ENOMEM;
goto err2;
goto err_strdup;
}
if (nla[NFTA_OBJ_USERDATA]) {
udlen = nla_len(nla[NFTA_OBJ_USERDATA]);
obj->udata = kzalloc(udlen, GFP_KERNEL);
if (obj->udata == NULL)
goto err_userdata;
nla_memcpy(obj->udata, nla[NFTA_OBJ_USERDATA], udlen);
obj->udlen = udlen;
}
err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
if (err < 0)
goto err3;
goto err_trans;
err = rhltable_insert(&nft_objname_ht, &obj->rhlhead,
nft_objname_ht_params);
if (err < 0)
goto err4;
goto err_obj_ht;
list_add_tail_rcu(&obj->list, &table->objects);
table->use++;
return 0;
err4:
err_obj_ht:
/* queued in transaction log */
INIT_LIST_HEAD(&obj->list);
return err;
err3:
err_trans:
kfree(obj->key.name);
err2:
err_userdata:
kfree(obj->udata);
err_strdup:
if (obj->ops->destroy)
obj->ops->destroy(&ctx, obj);
kfree(obj);
err1:
err_init:
module_put(type->owner);
return err;
}
......@@ -5986,6 +6021,10 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
NFTA_OBJ_PAD))
goto nla_put_failure;
if (obj->udata &&
nla_put(skb, NFTA_OBJ_USERDATA, obj->udlen, obj->udata))
goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
......
......@@ -14,6 +14,25 @@ struct nft_socket {
};
};
static void nft_socket_wildcard(const struct nft_pktinfo *pkt,
struct nft_regs *regs, struct sock *sk,
u32 *dest)
{
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
nft_reg_store8(dest, inet_sk(sk)->inet_rcv_saddr == 0);
break;
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case NFPROTO_IPV6:
nft_reg_store8(dest, ipv6_addr_any(&sk->sk_v6_rcv_saddr));
break;
#endif
default:
regs->verdict.code = NFT_BREAK;
return;
}
}
static void nft_socket_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
......@@ -59,6 +78,13 @@ static void nft_socket_eval(const struct nft_expr *expr,
return;
}
break;
case NFT_SOCKET_WILDCARD:
if (!sk_fullsock(sk)) {
regs->verdict.code = NFT_BREAK;
return;
}
nft_socket_wildcard(pkt, regs, sk, dest);
break;
default:
WARN_ON(1);
regs->verdict.code = NFT_BREAK;
......@@ -97,6 +123,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
priv->key = ntohl(nla_get_u32(tb[NFTA_SOCKET_KEY]));
switch(priv->key) {
case NFT_SOCKET_TRANSPARENT:
case NFT_SOCKET_WILDCARD:
len = sizeof(u8);
break;
case NFT_SOCKET_MARK:
......
......@@ -276,7 +276,7 @@ hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
return 0;
/* follow-up fragments don't contain ports, skip all fragments */
if (ip->frag_off & htons(IP_MF | IP_OFFSET))
if (ip_is_fragment(ip))
return 0;
hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info);
......
......@@ -24,8 +24,7 @@ CONFIG_IP_NF_NAT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NFT_NAT=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_NETEM=y
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment