Commit 4cdd3408 authored by Patrick McHardy's avatar Patrick McHardy Committed by Pablo Neira Ayuso

netfilter: nf_conntrack_ipv6: improve fragmentation handling

The IPv6 conntrack fragmentation currently has a couple of shortcomings.
Fragmentes are collected in PREROUTING/OUTPUT, are defragmented, the
defragmented packet is then passed to conntrack, the resulting conntrack
information is attached to each original fragment and the fragments then
continue their way through the stack.

Helper invocation occurs in the POSTROUTING hook, at which point only
the original fragments are available. The result of this is that
fragmented packets are never passed to helpers.

This patch improves the situation in the following way:

- If a reassembled packet belongs to a connection that has a helper
  assigned, the reassembled packet is passed through the stack instead
  of the original fragments.

- During defragmentation, the largest received fragment size is stored.
  On output, the packet is refragmented if required. If the largest
  received fragment size exceeds the outgoing MTU, a "packet too big"
  message is generated, thus behaving as if the original fragments
  were passed through the stack from an outside point of view.

- The ipv6_helper() hook function can't receive fragments anymore for
  connections using a helper, so it is switched to use ipv6_skip_exthdr()
  instead of the netfilter specific nf_ct_ipv6_skip_exthdr() and the
  reassembled packets are passed to connection tracking helpers.

The result of this is that we can properly track fragmented packets, but
still generate ICMPv6 Packet too big messages if we would have before.

This patch is also required as a precondition for IPv6 NAT, where NAT
helpers might enlarge packets up to a point that they require
fragmentation. In that case we can't generate Packet too big messages
since the proper MTU can't be calculated in all cases (f.i. when
changing textual representation of a variable amount of addresses),
so the packet is transparently fragmented iff the original packet or
fragments would have fit the outgoing MTU.

IPVS parts by Jesper Dangaard Brouer <brouer@redhat.com>.
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
parent 590e3f79
...@@ -256,6 +256,7 @@ struct inet6_skb_parm { ...@@ -256,6 +256,7 @@ struct inet6_skb_parm {
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
__u16 dsthao; __u16 dsthao;
#endif #endif
__u16 frag_max_size;
#define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_XFRM_TRANSFORMED 1
#define IP6SKB_FORWARDED 2 #define IP6SKB_FORWARDED 2
......
...@@ -493,7 +493,8 @@ int ip6_forward(struct sk_buff *skb) ...@@ -493,7 +493,8 @@ int ip6_forward(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU) if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU; mtu = IPV6_MIN_MTU;
if (skb->len > mtu && !skb_is_gso(skb)) { if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
(IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
/* Again, force OUTPUT device used as source address */ /* Again, force OUTPUT device used as source address */
skb->dev = dst->dev; skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
...@@ -636,7 +637,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ...@@ -636,7 +637,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
/* We must not fragment if the socket is set to force MTU discovery /* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket. * or if the skb it not generated by a local socket.
*/ */
if (unlikely(!skb->local_df && skb->len > mtu)) { if (unlikely(!skb->local_df && skb->len > mtu) ||
(IP6CB(skb)->frag_max_size &&
IP6CB(skb)->frag_max_size > mtu)) {
if (skb->sk && dst_allfrag(skb_dst(skb))) if (skb->sk && dst_allfrag(skb_dst(skb)))
sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
......
...@@ -153,10 +153,10 @@ static unsigned int ipv6_helper(unsigned int hooknum, ...@@ -153,10 +153,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
const struct nf_conn_help *help; const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper; const struct nf_conntrack_helper *helper;
enum ip_conntrack_info ctinfo; enum ip_conntrack_info ctinfo;
unsigned int ret, protoff; unsigned int ret;
unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; __be16 frag_off;
unsigned char pnum = ipv6_hdr(skb)->nexthdr; int protoff;
u8 nexthdr;
/* This is where we call the helper: as the packet goes out. */ /* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo); ct = nf_ct_get(skb, &ctinfo);
...@@ -171,9 +171,10 @@ static unsigned int ipv6_helper(unsigned int hooknum, ...@@ -171,9 +171,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
if (!helper) if (!helper)
return NF_ACCEPT; return NF_ACCEPT;
protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, nexthdr = ipv6_hdr(skb)->nexthdr;
skb->len - extoff); protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { &frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n"); pr_debug("proto header not found\n");
return NF_ACCEPT; return NF_ACCEPT;
} }
...@@ -199,9 +200,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum, ...@@ -199,9 +200,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
static unsigned int __ipv6_conntrack_in(struct net *net, static unsigned int __ipv6_conntrack_in(struct net *net,
unsigned int hooknum, unsigned int hooknum,
struct sk_buff *skb, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *)) int (*okfn)(struct sk_buff *))
{ {
struct sk_buff *reasm = skb->nfct_reasm; struct sk_buff *reasm = skb->nfct_reasm;
const struct nf_conn_help *help;
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
/* This packet is fragmented and has reassembled packet. */ /* This packet is fragmented and has reassembled packet. */
if (reasm) { if (reasm) {
...@@ -213,6 +219,23 @@ static unsigned int __ipv6_conntrack_in(struct net *net, ...@@ -213,6 +219,23 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
if (ret != NF_ACCEPT) if (ret != NF_ACCEPT)
return ret; return ret;
} }
/* Conntrack helpers need the entire reassembled packet in the
* POST_ROUTING hook.
*/
ct = nf_ct_get(reasm, &ctinfo);
if (ct != NULL && !nf_ct_is_untracked(ct)) {
help = nfct_help(ct);
if (help && help->helper) {
nf_conntrack_get_reasm(skb);
NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
(struct net_device *)in,
(struct net_device *)out,
okfn, NF_IP6_PRI_CONNTRACK + 1);
return NF_DROP_ERR(-ECANCELED);
}
}
nf_conntrack_get(reasm->nfct); nf_conntrack_get(reasm->nfct);
skb->nfct = reasm->nfct; skb->nfct = reasm->nfct;
skb->nfctinfo = reasm->nfctinfo; skb->nfctinfo = reasm->nfctinfo;
...@@ -228,7 +251,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, ...@@ -228,7 +251,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
const struct net_device *out, const struct net_device *out,
int (*okfn)(struct sk_buff *)) int (*okfn)(struct sk_buff *))
{ {
return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
} }
static unsigned int ipv6_conntrack_local(unsigned int hooknum, static unsigned int ipv6_conntrack_local(unsigned int hooknum,
...@@ -242,7 +265,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, ...@@ -242,7 +265,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT; return NF_ACCEPT;
} }
return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
} }
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
......
...@@ -190,6 +190,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, ...@@ -190,6 +190,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
const struct frag_hdr *fhdr, int nhoff) const struct frag_hdr *fhdr, int nhoff)
{ {
struct sk_buff *prev, *next; struct sk_buff *prev, *next;
unsigned int payload_len;
int offset, end; int offset, end;
if (fq->q.last_in & INET_FRAG_COMPLETE) { if (fq->q.last_in & INET_FRAG_COMPLETE) {
...@@ -197,8 +198,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, ...@@ -197,8 +198,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
goto err; goto err;
} }
payload_len = ntohs(ipv6_hdr(skb)->payload_len);
offset = ntohs(fhdr->frag_off) & ~0x7; offset = ntohs(fhdr->frag_off) & ~0x7;
end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - end = offset + (payload_len -
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) { if ((unsigned int)end > IPV6_MAXPLEN) {
...@@ -307,6 +310,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, ...@@ -307,6 +310,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
skb->dev = NULL; skb->dev = NULL;
fq->q.stamp = skb->tstamp; fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len; fq->q.meat += skb->len;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
atomic_add(skb->truesize, &nf_init_frags.mem); atomic_add(skb->truesize, &nf_init_frags.mem);
/* The first fragment. /* The first fragment.
...@@ -412,10 +417,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) ...@@ -412,10 +417,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
} }
atomic_sub(head->truesize, &nf_init_frags.mem); atomic_sub(head->truesize, &nf_init_frags.mem);
head->local_df = 1;
head->next = NULL; head->next = NULL;
head->dev = dev; head->dev = dev;
head->tstamp = fq->q.stamp; head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len); ipv6_hdr(head)->payload_len = htons(payload_len);
IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */ /* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE) if (head->ip_summed == CHECKSUM_COMPLETE)
...@@ -592,6 +599,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, ...@@ -592,6 +599,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
int (*okfn)(struct sk_buff *)) int (*okfn)(struct sk_buff *))
{ {
struct sk_buff *s, *s2; struct sk_buff *s, *s2;
unsigned int ret = 0;
for (s = NFCT_FRAG6_CB(skb)->orig; s;) { for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
nf_conntrack_put_reasm(s->nfct_reasm); nf_conntrack_put_reasm(s->nfct_reasm);
...@@ -601,8 +609,13 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, ...@@ -601,8 +609,13 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
s2 = s->next; s2 = s->next;
s->next = NULL; s->next = NULL;
NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn, if (ret != -ECANCELED)
ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
in, out, okfn,
NF_IP6_PRI_CONNTRACK_DEFRAG + 1); NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
else
kfree_skb(s);
s = s2; s = s2;
} }
nf_conntrack_put_reasm(skb); nf_conntrack_put_reasm(skb);
......
...@@ -88,7 +88,14 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) ...@@ -88,7 +88,14 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
static inline bool static inline bool
__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
{ {
if (skb->len > mtu && !skb_is_gso(skb)) { if (IP6CB(skb)->frag_max_size) {
/* frag_max_size tell us that, this packet have been
* defragmented by netfilter IPv6 conntrack module.
*/
if (IP6CB(skb)->frag_max_size > mtu)
return true; /* largest fragment violate MTU */
}
else if (skb->len > mtu && !skb_is_gso(skb)) {
return true; /* Packet size violate MTU size */ return true; /* Packet size violate MTU size */
} }
return false; return false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment