Commit 526160d9 authored by Patrick McHardy's avatar Patrick McHardy Committed by David S. Miller

[IPV4]: Keep fragment queues private to each user.

Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5365e5da
...@@ -262,10 +262,9 @@ extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack); ...@@ -262,10 +262,9 @@ extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
/* Fake conntrack entry for untracked connections */ /* Fake conntrack entry for untracked connections */
extern struct ip_conntrack ip_conntrack_untracked; extern struct ip_conntrack ip_conntrack_untracked;
extern int ip_ct_no_defrag;
/* Returns new sk_buff, or NULL */ /* Returns new sk_buff, or NULL */
struct sk_buff * struct sk_buff *
ip_ct_gather_frags(struct sk_buff *skb); ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user);
/* Iterate over all conntracks: if iter returns true, it's deleted. */ /* Iterate over all conntracks: if iter returns true, it's deleted. */
extern void extern void
......
...@@ -287,8 +287,19 @@ extern int ip_call_ra_chain(struct sk_buff *skb); ...@@ -287,8 +287,19 @@ extern int ip_call_ra_chain(struct sk_buff *skb);
* Functions provided by ip_fragment.o * Functions provided by ip_fragment.o
*/ */
struct sk_buff *ip_defrag(struct sk_buff *skb); enum ip_defrag_users
extern void ipfrag_flush(void); {
IP_DEFRAG_LOCAL_DELIVER,
IP_DEFRAG_CALL_RA_CHAIN,
IP_DEFRAG_CONNTRACK_IN,
IP_DEFRAG_CONNTRACK_OUT,
IP_DEFRAG_NAT_OUT,
IP_DEFRAG_VS_IN,
IP_DEFRAG_VS_OUT,
IP_DEFRAG_VS_FWD
};
struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user);
extern int ip_frag_nqueues; extern int ip_frag_nqueues;
extern atomic_t ip_frag_mem; extern atomic_t ip_frag_mem;
......
...@@ -73,6 +73,7 @@ struct ipfrag_skb_cb ...@@ -73,6 +73,7 @@ struct ipfrag_skb_cb
struct ipq { struct ipq {
struct ipq *next; /* linked list pointers */ struct ipq *next; /* linked list pointers */
struct list_head lru_list; /* lru list member */ struct list_head lru_list; /* lru list member */
u32 user;
u32 saddr; u32 saddr;
u32 daddr; u32 daddr;
u16 id; u16 id;
...@@ -243,13 +244,13 @@ static void ipq_kill(struct ipq *ipq) ...@@ -243,13 +244,13 @@ static void ipq_kill(struct ipq *ipq)
/* Memory limiting on fragments. Evictor trashes the oldest /* Memory limiting on fragments. Evictor trashes the oldest
* fragment queue until we are back under the threshold. * fragment queue until we are back under the threshold.
*/ */
static void __ip_evictor(int threshold) static void ip_evictor(void)
{ {
struct ipq *qp; struct ipq *qp;
struct list_head *tmp; struct list_head *tmp;
int work; int work;
work = atomic_read(&ip_frag_mem) - threshold; work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
if (work <= 0) if (work <= 0)
return; return;
...@@ -274,11 +275,6 @@ static void __ip_evictor(int threshold) ...@@ -274,11 +275,6 @@ static void __ip_evictor(int threshold)
} }
} }
static inline void ip_evictor(void)
{
__ip_evictor(sysctl_ipfrag_low_thresh);
}
/* /*
* Oops, a fragment queue timed out. Kill it and send an ICMP reply. * Oops, a fragment queue timed out. Kill it and send an ICMP reply.
*/ */
...@@ -325,7 +321,8 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in) ...@@ -325,7 +321,8 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in)
if(qp->id == qp_in->id && if(qp->id == qp_in->id &&
qp->saddr == qp_in->saddr && qp->saddr == qp_in->saddr &&
qp->daddr == qp_in->daddr && qp->daddr == qp_in->daddr &&
qp->protocol == qp_in->protocol) { qp->protocol == qp_in->protocol &&
qp->user == qp_in->user) {
atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt);
write_unlock(&ipfrag_lock); write_unlock(&ipfrag_lock);
qp_in->last_in |= COMPLETE; qp_in->last_in |= COMPLETE;
...@@ -352,7 +349,7 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in) ...@@ -352,7 +349,7 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in)
} }
/* Add an entry to the 'ipq' queue for a newly received IP datagram. */ /* Add an entry to the 'ipq' queue for a newly received IP datagram. */
static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph) static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
{ {
struct ipq *qp; struct ipq *qp;
...@@ -364,6 +361,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph) ...@@ -364,6 +361,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph)
qp->id = iph->id; qp->id = iph->id;
qp->saddr = iph->saddr; qp->saddr = iph->saddr;
qp->daddr = iph->daddr; qp->daddr = iph->daddr;
qp->user = user;
qp->len = 0; qp->len = 0;
qp->meat = 0; qp->meat = 0;
qp->fragments = NULL; qp->fragments = NULL;
...@@ -386,7 +384,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph) ...@@ -386,7 +384,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph)
/* Find the correct entry in the "incomplete datagrams" queue for /* Find the correct entry in the "incomplete datagrams" queue for
* this IP datagram, and create new one, if nothing is found. * this IP datagram, and create new one, if nothing is found.
*/ */
static inline struct ipq *ip_find(struct iphdr *iph) static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
{ {
__u16 id = iph->id; __u16 id = iph->id;
__u32 saddr = iph->saddr; __u32 saddr = iph->saddr;
...@@ -400,7 +398,8 @@ static inline struct ipq *ip_find(struct iphdr *iph) ...@@ -400,7 +398,8 @@ static inline struct ipq *ip_find(struct iphdr *iph)
if(qp->id == id && if(qp->id == id &&
qp->saddr == saddr && qp->saddr == saddr &&
qp->daddr == daddr && qp->daddr == daddr &&
qp->protocol == protocol) { qp->protocol == protocol &&
qp->user == user) {
atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt);
read_unlock(&ipfrag_lock); read_unlock(&ipfrag_lock);
return qp; return qp;
...@@ -408,7 +407,7 @@ static inline struct ipq *ip_find(struct iphdr *iph) ...@@ -408,7 +407,7 @@ static inline struct ipq *ip_find(struct iphdr *iph)
} }
read_unlock(&ipfrag_lock); read_unlock(&ipfrag_lock);
return ip_frag_create(hash, iph); return ip_frag_create(hash, iph, user);
} }
/* Add new segment to existing queue. */ /* Add new segment to existing queue. */
...@@ -642,7 +641,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) ...@@ -642,7 +641,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
} }
/* Process an incoming IP datagram fragment. */ /* Process an incoming IP datagram fragment. */
struct sk_buff *ip_defrag(struct sk_buff *skb) struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
{ {
struct iphdr *iph = skb->nh.iph; struct iphdr *iph = skb->nh.iph;
struct ipq *qp; struct ipq *qp;
...@@ -657,7 +656,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) ...@@ -657,7 +656,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
dev = skb->dev; dev = skb->dev;
/* Lookup (or create) queue header */ /* Lookup (or create) queue header */
if ((qp = ip_find(iph)) != NULL) { if ((qp = ip_find(iph, user)) != NULL) {
struct sk_buff *ret = NULL; struct sk_buff *ret = NULL;
spin_lock(&qp->lock); spin_lock(&qp->lock);
...@@ -689,10 +688,4 @@ void ipfrag_init(void) ...@@ -689,10 +688,4 @@ void ipfrag_init(void)
add_timer(&ipfrag_secret_timer); add_timer(&ipfrag_secret_timer);
} }
void ipfrag_flush(void)
{
__ip_evictor(0);
}
EXPORT_SYMBOL(ip_defrag); EXPORT_SYMBOL(ip_defrag);
EXPORT_SYMBOL(ipfrag_flush);
...@@ -172,7 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb) ...@@ -172,7 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
(!sk->sk_bound_dev_if || (!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) { sk->sk_bound_dev_if == skb->dev->ifindex)) {
if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
skb = ip_defrag(skb); skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
if (skb == NULL) { if (skb == NULL) {
read_unlock(&ip_ra_lock); read_unlock(&ip_ra_lock);
return 1; return 1;
...@@ -273,7 +273,7 @@ int ip_local_deliver(struct sk_buff *skb) ...@@ -273,7 +273,7 @@ int ip_local_deliver(struct sk_buff *skb)
*/ */
if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
skb = ip_defrag(skb); skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
if (!skb) if (!skb)
return 0; return 0;
} }
......
...@@ -544,9 +544,9 @@ u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) ...@@ -544,9 +544,9 @@ u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
} }
static inline struct sk_buff * static inline struct sk_buff *
ip_vs_gather_frags(struct sk_buff *skb) ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{ {
skb = ip_defrag(skb); skb = ip_defrag(skb, user);
if (skb) if (skb)
ip_send_check(skb->nh.iph); ip_send_check(skb->nh.iph);
return skb; return skb;
...@@ -620,7 +620,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) ...@@ -620,7 +620,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
/* reassemble IP fragments */ /* reassemble IP fragments */
if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
skb = ip_vs_gather_frags(skb); skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
if (!skb) if (!skb)
return NF_STOLEN; return NF_STOLEN;
*pskb = skb; *pskb = skb;
...@@ -759,7 +759,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, ...@@ -759,7 +759,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
/* reassemble IP fragments */ /* reassemble IP fragments */
if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) && if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) { !pp->dont_defrag)) {
skb = ip_vs_gather_frags(skb); skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
if (!skb) if (!skb)
return NF_STOLEN; return NF_STOLEN;
iph = skb->nh.iph; iph = skb->nh.iph;
...@@ -839,7 +839,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, ...@@ -839,7 +839,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
* forward to the right destination host if relevant. * forward to the right destination host if relevant.
* Currently handles error types - unreachable, quench, ttl exceeded. * Currently handles error types - unreachable, quench, ttl exceeded.
*/ */
static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) static int
ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
{ {
struct sk_buff *skb = *pskb; struct sk_buff *skb = *pskb;
struct iphdr *iph; struct iphdr *iph;
...@@ -853,7 +854,9 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related) ...@@ -853,7 +854,9 @@ static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
/* reassemble IP fragments */ /* reassemble IP fragments */
if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
skb = ip_vs_gather_frags(skb); skb = ip_vs_gather_frags(skb,
hooknum == NF_IP_LOCAL_IN ?
IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
if (!skb) if (!skb)
return NF_STOLEN; return NF_STOLEN;
*pskb = skb; *pskb = skb;
...@@ -962,7 +965,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, ...@@ -962,7 +965,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
iph = skb->nh.iph; iph = skb->nh.iph;
if (unlikely(iph->protocol == IPPROTO_ICMP)) { if (unlikely(iph->protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_in_icmp(pskb, &related); int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
if (related) if (related)
return verdict; return verdict;
...@@ -1057,7 +1060,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb, ...@@ -1057,7 +1060,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP) if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
return NF_ACCEPT; return NF_ACCEPT;
return ip_vs_in_icmp(pskb, &r); return ip_vs_in_icmp(pskb, &r, hooknum);
} }
......
...@@ -936,29 +936,22 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, ...@@ -936,29 +936,22 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
} }
} }
int ip_ct_no_defrag;
/* Returns new sk_buff, or NULL */ /* Returns new sk_buff, or NULL */
struct sk_buff * struct sk_buff *
ip_ct_gather_frags(struct sk_buff *skb) ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
{ {
struct sock *sk = skb->sk; struct sock *sk = skb->sk;
#ifdef CONFIG_NETFILTER_DEBUG #ifdef CONFIG_NETFILTER_DEBUG
unsigned int olddebug = skb->nf_debug; unsigned int olddebug = skb->nf_debug;
#endif #endif
if (unlikely(ip_ct_no_defrag)) {
kfree_skb(skb);
return NULL;
}
if (sk) { if (sk) {
sock_hold(sk); sock_hold(sk);
skb_orphan(skb); skb_orphan(skb);
} }
local_bh_disable(); local_bh_disable();
skb = ip_defrag(skb); skb = ip_defrag(skb, user);
local_bh_enable(); local_bh_enable();
if (!skb) { if (!skb) {
......
...@@ -391,7 +391,10 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum, ...@@ -391,7 +391,10 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum,
/* Gather fragments. */ /* Gather fragments. */
if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
*pskb = ip_ct_gather_frags(*pskb); *pskb = ip_ct_gather_frags(*pskb,
hooknum == NF_IP_PRE_ROUTING ?
IP_DEFRAG_CONNTRACK_IN :
IP_DEFRAG_CONNTRACK_OUT);
if (!*pskb) if (!*pskb)
return NF_STOLEN; return NF_STOLEN;
} }
...@@ -823,12 +826,6 @@ static int init_or_cleanup(int init) ...@@ -823,12 +826,6 @@ static int init_or_cleanup(int init)
cleanup_defraglocalops: cleanup_defraglocalops:
nf_unregister_hook(&ip_conntrack_defrag_local_out_ops); nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
cleanup_defragops: cleanup_defragops:
/* Frag queues may hold fragments with skb->dst == NULL */
ip_ct_no_defrag = 1;
synchronize_net();
local_bh_disable();
ipfrag_flush();
local_bh_enable();
nf_unregister_hook(&ip_conntrack_defrag_ops); nf_unregister_hook(&ip_conntrack_defrag_ops);
cleanup_proc_stat: cleanup_proc_stat:
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
......
...@@ -195,7 +195,7 @@ ip_nat_out(unsigned int hooknum, ...@@ -195,7 +195,7 @@ ip_nat_out(unsigned int hooknum,
I'm starting to have nightmares about fragments. */ I'm starting to have nightmares about fragments. */
if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
*pskb = ip_ct_gather_frags(*pskb); *pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
if (!*pskb) if (!*pskb)
return NF_STOLEN; return NF_STOLEN;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment