Commit 12ed6015 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

This patchset contains Netfilter fixes for net:

1) Extend selftest to cover flowtable with ipsec, from Florian Westphal.

2) Fix interaction of ipsec with flowtable, also from Florian.

3) User-after-free with bound set to rule that fails to load.

4) Adjust state and timeout for flows that expire.

5) Timeout update race with flows in teardown state.

6) Ensure conntrack id hash calculation use invariants as input,
   from Dirk Morris.

7) Do not push flows into flowtable for TCP fin/rst packets.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 32d3182c dfe42be1
...@@ -421,8 +421,7 @@ struct nft_set { ...@@ -421,8 +421,7 @@ struct nft_set {
unsigned char *udata; unsigned char *udata;
/* runtime data below here */ /* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned; const struct nft_set_ops *ops ____cacheline_aligned;
u16 flags:13, u16 flags:14,
bound:1,
genmask:2; genmask:2;
u8 klen; u8 klen;
u8 dlen; u8 dlen;
...@@ -1348,12 +1347,15 @@ struct nft_trans_rule { ...@@ -1348,12 +1347,15 @@ struct nft_trans_rule {
struct nft_trans_set { struct nft_trans_set {
struct nft_set *set; struct nft_set *set;
u32 set_id; u32 set_id;
bool bound;
}; };
#define nft_trans_set(trans) \ #define nft_trans_set(trans) \
(((struct nft_trans_set *)trans->data)->set) (((struct nft_trans_set *)trans->data)->set)
#define nft_trans_set_id(trans) \ #define nft_trans_set_id(trans) \
(((struct nft_trans_set *)trans->data)->set_id) (((struct nft_trans_set *)trans->data)->set_id)
#define nft_trans_set_bound(trans) \
(((struct nft_trans_set *)trans->data)->bound)
struct nft_trans_chain { struct nft_trans_chain {
bool update; bool update;
...@@ -1384,12 +1386,15 @@ struct nft_trans_table { ...@@ -1384,12 +1386,15 @@ struct nft_trans_table {
struct nft_trans_elem { struct nft_trans_elem {
struct nft_set *set; struct nft_set *set;
struct nft_set_elem elem; struct nft_set_elem elem;
bool bound;
}; };
#define nft_trans_elem_set(trans) \ #define nft_trans_elem_set(trans) \
(((struct nft_trans_elem *)trans->data)->set) (((struct nft_trans_elem *)trans->data)->set)
#define nft_trans_elem(trans) \ #define nft_trans_elem(trans) \
(((struct nft_trans_elem *)trans->data)->elem) (((struct nft_trans_elem *)trans->data)->elem)
#define nft_trans_elem_set_bound(trans) \
(((struct nft_trans_elem *)trans->data)->bound)
struct nft_trans_obj { struct nft_trans_obj {
struct nft_object *obj; struct nft_object *obj;
......
...@@ -453,13 +453,12 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); ...@@ -453,13 +453,12 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
* table location, we assume id gets exposed to userspace. * table location, we assume id gets exposed to userspace.
* *
* Following nf_conn items do not change throughout lifetime * Following nf_conn items do not change throughout lifetime
* of the nf_conn after it has been committed to main hash table: * of the nf_conn:
* *
* 1. nf_conn address * 1. nf_conn address
* 2. nf_conn->ext address * 2. nf_conn->master address (normally NULL)
* 3. nf_conn->master address (normally NULL) * 3. the associated net namespace
* 4. tuple * 4. the original direction tuple
* 5. the associated net namespace
*/ */
u32 nf_ct_get_id(const struct nf_conn *ct) u32 nf_ct_get_id(const struct nf_conn *ct)
{ {
...@@ -469,9 +468,10 @@ u32 nf_ct_get_id(const struct nf_conn *ct) ...@@ -469,9 +468,10 @@ u32 nf_ct_get_id(const struct nf_conn *ct)
net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
a = (unsigned long)ct; a = (unsigned long)ct;
b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); b = (unsigned long)ct->master;
c = (unsigned long)ct->ext; c = (unsigned long)nf_ct_net(ct);
d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple),
&ct_id_seed); &ct_id_seed);
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
......
...@@ -111,15 +111,16 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) ...@@ -111,15 +111,16 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ) #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ) #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
static void flow_offload_fixup_ct_state(struct nf_conn *ct) static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
{
return (__s32)(timeout - (u32)jiffies);
}
static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
{ {
const struct nf_conntrack_l4proto *l4proto; const struct nf_conntrack_l4proto *l4proto;
int l4num = nf_ct_protonum(ct);
unsigned int timeout; unsigned int timeout;
int l4num;
l4num = nf_ct_protonum(ct);
if (l4num == IPPROTO_TCP)
flow_offload_fixup_tcp(&ct->proto.tcp);
l4proto = nf_ct_l4proto_find(l4num); l4proto = nf_ct_l4proto_find(l4num);
if (!l4proto) if (!l4proto)
...@@ -132,9 +133,22 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct) ...@@ -132,9 +133,22 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
else else
return; return;
if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
ct->timeout = nfct_time_stamp + timeout; ct->timeout = nfct_time_stamp + timeout;
} }
static void flow_offload_fixup_ct_state(struct nf_conn *ct)
{
if (nf_ct_protonum(ct) == IPPROTO_TCP)
flow_offload_fixup_tcp(&ct->proto.tcp);
}
static void flow_offload_fixup_ct(struct nf_conn *ct)
{
flow_offload_fixup_ct_state(ct);
flow_offload_fixup_ct_timeout(ct);
}
void flow_offload_free(struct flow_offload *flow) void flow_offload_free(struct flow_offload *flow)
{ {
struct flow_offload_entry *e; struct flow_offload_entry *e;
...@@ -208,6 +222,11 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) ...@@ -208,6 +222,11 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
} }
EXPORT_SYMBOL_GPL(flow_offload_add); EXPORT_SYMBOL_GPL(flow_offload_add);
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
{
return nf_flow_timeout_delta(flow->timeout) <= 0;
}
static void flow_offload_del(struct nf_flowtable *flow_table, static void flow_offload_del(struct nf_flowtable *flow_table,
struct flow_offload *flow) struct flow_offload *flow)
{ {
...@@ -223,6 +242,11 @@ static void flow_offload_del(struct nf_flowtable *flow_table, ...@@ -223,6 +242,11 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
e = container_of(flow, struct flow_offload_entry, flow); e = container_of(flow, struct flow_offload_entry, flow);
clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
if (nf_flow_has_expired(flow))
flow_offload_fixup_ct(e->ct);
else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
flow_offload_fixup_ct_timeout(e->ct);
flow_offload_free(flow); flow_offload_free(flow);
} }
...@@ -298,11 +322,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, ...@@ -298,11 +322,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err; return err;
} }
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
{
return (__s32)(flow->timeout - (u32)jiffies) <= 0;
}
static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
{ {
struct nf_flowtable *flow_table = data; struct nf_flowtable *flow_table = data;
......
...@@ -214,6 +214,25 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) ...@@ -214,6 +214,25 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
return true; return true;
} }
static int nf_flow_offload_dst_check(struct dst_entry *dst)
{
if (unlikely(dst_xfrm(dst)))
return dst_check(dst, 0) ? 0 : -1;
return 0;
}
static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
const struct nf_hook_state *state,
struct dst_entry *dst)
{
skb_orphan(skb);
skb_dst_set_noref(skb, dst);
skb->tstamp = 0;
dst_output(state->net, state->sk, skb);
return NF_STOLEN;
}
unsigned int unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state) const struct nf_hook_state *state)
...@@ -254,6 +273,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ...@@ -254,6 +273,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
return NF_ACCEPT; return NF_ACCEPT;
if (nf_flow_offload_dst_check(&rt->dst)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
return NF_DROP; return NF_DROP;
...@@ -261,6 +285,13 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ...@@ -261,6 +285,13 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
iph = ip_hdr(skb); iph = ip_hdr(skb);
ip_decrease_ttl(iph); ip_decrease_ttl(iph);
if (unlikely(dst_xfrm(&rt->dst))) {
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
IPCB(skb)->iif = skb->dev->ifindex;
IPCB(skb)->flags = IPSKB_FORWARDED;
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
skb->dev = outdev; skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
skb_dst_set_noref(skb, &rt->dst); skb_dst_set_noref(skb, &rt->dst);
...@@ -467,6 +498,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ...@@ -467,6 +498,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
sizeof(*ip6h))) sizeof(*ip6h)))
return NF_ACCEPT; return NF_ACCEPT;
if (nf_flow_offload_dst_check(&rt->dst)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
if (skb_try_make_writable(skb, sizeof(*ip6h))) if (skb_try_make_writable(skb, sizeof(*ip6h)))
return NF_DROP; return NF_DROP;
...@@ -477,6 +513,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ...@@ -477,6 +513,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
ip6h = ipv6_hdr(skb); ip6h = ipv6_hdr(skb);
ip6h->hop_limit--; ip6h->hop_limit--;
if (unlikely(dst_xfrm(&rt->dst))) {
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
IP6CB(skb)->iif = skb->dev->ifindex;
IP6CB(skb)->flags = IP6SKB_FORWARDED;
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
skb->dev = outdev; skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
skb_dst_set_noref(skb, &rt->dst); skb_dst_set_noref(skb, &rt->dst);
......
...@@ -138,9 +138,14 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) ...@@ -138,9 +138,14 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
return; return;
list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
if (trans->msg_type == NFT_MSG_NEWSET && switch (trans->msg_type) {
nft_trans_set(trans) == set) { case NFT_MSG_NEWSET:
set->bound = true; if (nft_trans_set(trans) == set)
nft_trans_set_bound(trans) = true;
break;
case NFT_MSG_NEWSETELEM:
if (nft_trans_elem_set(trans) == set)
nft_trans_elem_set_bound(trans) = true;
break; break;
} }
} }
...@@ -6906,7 +6911,7 @@ static int __nf_tables_abort(struct net *net) ...@@ -6906,7 +6911,7 @@ static int __nf_tables_abort(struct net *net)
break; break;
case NFT_MSG_NEWSET: case NFT_MSG_NEWSET:
trans->ctx.table->use--; trans->ctx.table->use--;
if (nft_trans_set(trans)->bound) { if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans); nft_trans_destroy(trans);
break; break;
} }
...@@ -6918,7 +6923,7 @@ static int __nf_tables_abort(struct net *net) ...@@ -6918,7 +6923,7 @@ static int __nf_tables_abort(struct net *net)
nft_trans_destroy(trans); nft_trans_destroy(trans);
break; break;
case NFT_MSG_NEWSETELEM: case NFT_MSG_NEWSETELEM:
if (nft_trans_elem_set(trans)->bound) { if (nft_trans_elem_set_bound(trans)) {
nft_trans_destroy(trans); nft_trans_destroy(trans);
break; break;
} }
......
...@@ -72,11 +72,11 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, ...@@ -72,11 +72,11 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
{ {
struct nft_flow_offload *priv = nft_expr_priv(expr); struct nft_flow_offload *priv = nft_expr_priv(expr);
struct nf_flowtable *flowtable = &priv->flowtable->data; struct nf_flowtable *flowtable = &priv->flowtable->data;
struct tcphdr _tcph, *tcph = NULL;
enum ip_conntrack_info ctinfo; enum ip_conntrack_info ctinfo;
struct nf_flow_route route; struct nf_flow_route route;
struct flow_offload *flow; struct flow_offload *flow;
enum ip_conntrack_dir dir; enum ip_conntrack_dir dir;
bool is_tcp = false;
struct nf_conn *ct; struct nf_conn *ct;
int ret; int ret;
...@@ -89,7 +89,10 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, ...@@ -89,7 +89,10 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
case IPPROTO_TCP: case IPPROTO_TCP:
is_tcp = true; tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff,
sizeof(_tcph), &_tcph);
if (unlikely(!tcph || tcph->fin || tcph->rst))
goto out;
break; break;
case IPPROTO_UDP: case IPPROTO_UDP:
break; break;
...@@ -115,7 +118,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, ...@@ -115,7 +118,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (!flow) if (!flow)
goto err_flow_alloc; goto err_flow_alloc;
if (is_tcp) { if (tcph) {
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
} }
......
...@@ -321,4 +321,52 @@ else ...@@ -321,4 +321,52 @@ else
ip netns exec nsr1 nft list ruleset ip netns exec nsr1 nft list ruleset
fi fi
KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
SPI1=$RANDOM
SPI2=$RANDOM
if [ $SPI1 -eq $SPI2 ]; then
SPI2=$((SPI2+1))
fi
do_esp() {
local ns=$1
local me=$2
local remote=$3
local lnet=$4
local rnet=$5
local spi_out=$6
local spi_in=$7
ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
# to encrypt packets as they go out (includes forwarded packets that need encapsulation)
ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
# to fwd decrypted packets after esp processing:
ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
}
do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
ip netns exec nsr1 nft delete table ip nat
# restore default routes
ip -net ns2 route del 192.168.10.1 via 10.0.2.1
ip -net ns2 route add default via 10.0.2.1
ip -net ns2 route add default via dead:2::1
test_tcp_forwarding ns1 ns2
if [ $? -eq 0 ] ;then
echo "PASS: ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
ip netns exec nsr1 nft list ruleset 1>&2
ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
fi
exit $ret exit $ret
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment