Commit 70d0a61c authored by Rusty Russell's avatar Rusty Russell Committed by David S. Miller

[NETFILTER]: Make NAT code handle non-linear skbs.

Makes the NAT code and all NAT helpers handle non-linear skbs.
Main trick is to introduce skb_ip_make_writable which handles all
the decloning, linearizing, etc.
parent 09f2947d
...@@ -16,7 +16,7 @@ extern unsigned int do_bindings(struct ip_conntrack *ct, ...@@ -16,7 +16,7 @@ extern unsigned int do_bindings(struct ip_conntrack *ct,
extern struct list_head protos; extern struct list_head protos;
extern unsigned int icmp_reply_translation(struct sk_buff *skb, extern int icmp_reply_translation(struct sk_buff **pskb,
struct ip_conntrack *conntrack, struct ip_conntrack *conntrack,
unsigned int hooknum, unsigned int hooknum,
int dir); int dir);
...@@ -30,4 +30,10 @@ extern void place_in_hashes(struct ip_conntrack *conntrack, ...@@ -30,4 +30,10 @@ extern void place_in_hashes(struct ip_conntrack *conntrack,
extern struct ip_nat_protocol ip_nat_protocol_tcp; extern struct ip_nat_protocol ip_nat_protocol_tcp;
extern struct ip_nat_protocol ip_nat_protocol_udp; extern struct ip_nat_protocol ip_nat_protocol_udp;
extern struct ip_nat_protocol ip_nat_protocol_icmp; extern struct ip_nat_protocol ip_nat_protocol_icmp;
/* Call this before modifying an existing IP packet: ensures it is
modifiable and linear to the point you care about (writable_len).
Returns true or false. */
extern int skb_ip_make_writable(struct sk_buff **pskb,
unsigned int writable_len);
#endif /* _IP_NAT_CORE_H */ #endif /* _IP_NAT_CORE_H */
...@@ -43,22 +43,23 @@ extern struct list_head helpers; ...@@ -43,22 +43,23 @@ extern struct list_head helpers;
extern int ip_nat_helper_register(struct ip_nat_helper *me); extern int ip_nat_helper_register(struct ip_nat_helper *me);
extern void ip_nat_helper_unregister(struct ip_nat_helper *me); extern void ip_nat_helper_unregister(struct ip_nat_helper *me);
/* These return true or false. */
extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb, extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb,
struct ip_conntrack *ct, struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo, enum ip_conntrack_info ctinfo,
unsigned int match_offset, unsigned int match_offset,
unsigned int match_len, unsigned int match_len,
char *rep_buffer, const char *rep_buffer,
unsigned int rep_len); unsigned int rep_len);
extern int ip_nat_mangle_udp_packet(struct sk_buff **skb, extern int ip_nat_mangle_udp_packet(struct sk_buff **skb,
struct ip_conntrack *ct, struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo, enum ip_conntrack_info ctinfo,
unsigned int match_offset, unsigned int match_offset,
unsigned int match_len, unsigned int match_len,
char *rep_buffer, const char *rep_buffer,
unsigned int rep_len); unsigned int rep_len);
extern int ip_nat_seq_adjust(struct sk_buff *skb, extern int ip_nat_seq_adjust(struct sk_buff **pskb,
struct ip_conntrack *ct, struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo); enum ip_conntrack_info ctinfo);
extern void ip_nat_delete_sack(struct sk_buff *skb);
#endif #endif
...@@ -18,8 +18,9 @@ struct ip_nat_protocol ...@@ -18,8 +18,9 @@ struct ip_nat_protocol
unsigned int protonum; unsigned int protonum;
/* Do a packet translation according to the ip_nat_proto_manip /* Do a packet translation according to the ip_nat_proto_manip
* and manip type. */ * and manip type. Return true if succeeded. */
void (*manip_pkt)(struct iphdr *iph, size_t len, int (*manip_pkt)(struct sk_buff **pskb,
unsigned int hdroff,
const struct ip_conntrack_manip *manip, const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype); enum ip_nat_manip_type maniptype);
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
#include <net/icmp.h> #include <net/icmp.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/tcp.h> /* For tcp_prot in getorigdst */ #include <net/tcp.h> /* For tcp_prot in getorigdst */
#include <linux/icmp.h>
#include <linux/udp.h>
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
...@@ -698,14 +700,26 @@ void place_in_hashes(struct ip_conntrack *conntrack, ...@@ -698,14 +700,26 @@ void place_in_hashes(struct ip_conntrack *conntrack,
list_prepend(&byipsproto[ipsprotohash], &info->byipsproto); list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
} }
static void /* Returns true if succeeded. */
manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len, static int
manip_pkt(u_int16_t proto,
struct sk_buff **pskb,
unsigned int iphdroff,
const struct ip_conntrack_manip *manip, const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype, enum ip_nat_manip_type maniptype)
__u32 *nfcache)
{ {
*nfcache |= NFC_ALTERED; struct iphdr *iph;
find_nat_proto(proto)->manip_pkt(iph, len, manip, maniptype);
(*pskb)->nfcache |= NFC_ALTERED;
if (!skb_ip_make_writable(pskb, iphdroff+sizeof(iph)))
return 0;
iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */
if (!find_nat_proto(proto)->manip_pkt(pskb, iphdroff + iph->ihl*4,
manip, maniptype))
return 0;
if (maniptype == IP_NAT_MANIP_SRC) { if (maniptype == IP_NAT_MANIP_SRC) {
iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip, iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip,
...@@ -716,17 +730,7 @@ manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len, ...@@ -716,17 +730,7 @@ manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len,
iph->check); iph->check);
iph->daddr = manip->ip; iph->daddr = manip->ip;
} }
#if 0 return 1;
if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
DEBUGP("IP: checksum on packet bad.\n");
if (proto == IPPROTO_TCP) {
void *th = (u_int32_t *)iph + iph->ihl;
if (tcp_v4_check(th, len - 4*iph->ihl, iph->saddr, iph->daddr,
csum_partial((char *)th, len-4*iph->ihl, 0)))
DEBUGP("TCP: checksum on packet bad\n");
}
#endif
} }
static inline int exp_for_packet(struct ip_conntrack_expect *exp, static inline int exp_for_packet(struct ip_conntrack_expect *exp,
...@@ -754,25 +758,13 @@ do_bindings(struct ip_conntrack *ct, ...@@ -754,25 +758,13 @@ do_bindings(struct ip_conntrack *ct,
unsigned int i; unsigned int i;
struct ip_nat_helper *helper; struct ip_nat_helper *helper;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
int is_tcp = (*pskb)->nh.iph->protocol == IPPROTO_TCP; int proto = (*pskb)->nh.iph->protocol;
/* Need nat lock to protect against modification, but neither /* Need nat lock to protect against modification, but neither
conntrack (referenced) and helper (deleted with conntrack (referenced) and helper (deleted with
synchronize_bh()) can vanish. */ synchronize_bh()) can vanish. */
READ_LOCK(&ip_nat_lock); READ_LOCK(&ip_nat_lock);
for (i = 0; i < info->num_manips; i++) { for (i = 0; i < info->num_manips; i++) {
/* raw socket (tcpdump) may have clone of incoming
skb: don't disturb it --RR */
if (skb_cloned(*pskb) && !(*pskb)->sk) {
struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
if (!nskb) {
READ_UNLOCK(&ip_nat_lock);
return NF_DROP;
}
kfree_skb(*pskb);
*pskb = nskb;
}
if (info->manips[i].direction == dir if (info->manips[i].direction == dir
&& info->manips[i].hooknum == hooknum) { && info->manips[i].hooknum == hooknum) {
DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n", DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n",
...@@ -781,12 +773,12 @@ do_bindings(struct ip_conntrack *ct, ...@@ -781,12 +773,12 @@ do_bindings(struct ip_conntrack *ct,
? "SRC" : "DST", ? "SRC" : "DST",
NIPQUAD(info->manips[i].manip.ip), NIPQUAD(info->manips[i].manip.ip),
htons(info->manips[i].manip.u.all)); htons(info->manips[i].manip.u.all));
manip_pkt((*pskb)->nh.iph->protocol, if (manip_pkt(proto, pskb, 0,
(*pskb)->nh.iph,
(*pskb)->len,
&info->manips[i].manip, &info->manips[i].manip,
info->manips[i].maniptype, info->manips[i].maniptype) < 0) {
&(*pskb)->nfcache); READ_UNLOCK(&ip_nat_lock);
return NF_DROP;
}
} }
} }
helper = info->helper; helper = info->helper;
...@@ -839,12 +831,14 @@ do_bindings(struct ip_conntrack *ct, ...@@ -839,12 +831,14 @@ do_bindings(struct ip_conntrack *ct,
/* Adjust sequence number only once per packet /* Adjust sequence number only once per packet
* (helper is called at all hooks) */ * (helper is called at all hooks) */
if (is_tcp && (hooknum == NF_IP_POST_ROUTING if (proto == IPPROTO_TCP
&& (hooknum == NF_IP_POST_ROUTING
|| hooknum == NF_IP_LOCAL_IN)) { || hooknum == NF_IP_LOCAL_IN)) {
DEBUGP("ip_nat_core: adjusting sequence number\n"); DEBUGP("ip_nat_core: adjusting sequence number\n");
/* future: put this in a l4-proto specific function, /* future: put this in a l4-proto specific function,
* and call this function here. */ * and call this function here. */
ip_nat_seq_adjust(*pskb, ct, ctinfo); if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
ret = NF_DROP;
} }
return ret; return ret;
...@@ -855,39 +849,51 @@ do_bindings(struct ip_conntrack *ct, ...@@ -855,39 +849,51 @@ do_bindings(struct ip_conntrack *ct,
/* not reached */ /* not reached */
} }
unsigned int int
icmp_reply_translation(struct sk_buff *skb, icmp_reply_translation(struct sk_buff **pskb,
struct ip_conntrack *conntrack, struct ip_conntrack *conntrack,
unsigned int hooknum, unsigned int hooknum,
int dir) int dir)
{ {
struct iphdr *iph = skb->nh.iph; struct {
struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); struct icmphdr icmp;
struct iphdr *inner = (struct iphdr *)(hdr + 1); struct iphdr ip;
size_t datalen = skb->len - ((void *)inner - (void *)iph); } *inside;
unsigned int i; unsigned int i;
struct ip_nat_info *info = &conntrack->nat.info; struct ip_nat_info *info = &conntrack->nat.info;
IP_NF_ASSERT(skb->len >= iph->ihl*4 + sizeof(struct icmphdr)); if (!skb_ip_make_writable(pskb,(*pskb)->nh.iph->ihl*4+sizeof(*inside)))
return 0;
inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
/* We're actually going to mangle it beyond trivial checksum
adjustment, so make sure the current checksum is correct. */
if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY
&& (u16)csum_fold(skb_checksum(*pskb, (*pskb)->nh.iph->ihl*4,
(*pskb)->len, 0)))
return 0;
/* Must be RELATED */ /* Must be RELATED */
IP_NF_ASSERT(skb->nfct - (struct ip_conntrack *)skb->nfct->master IP_NF_ASSERT((*pskb)->nfct
- (struct ip_conntrack *)(*pskb)->nfct->master
== IP_CT_RELATED == IP_CT_RELATED
|| skb->nfct - (struct ip_conntrack *)skb->nfct->master || (*pskb)->nfct
- (struct ip_conntrack *)(*pskb)->nfct->master
== IP_CT_RELATED+IP_CT_IS_REPLY); == IP_CT_RELATED+IP_CT_IS_REPLY);
/* Redirects on non-null nats must be dropped, else they'll /* Redirects on non-null nats must be dropped, else they'll
start talking to each other without our translation, and be start talking to each other without our translation, and be
confused... --RR */ confused... --RR */
if (hdr->type == ICMP_REDIRECT) { if (inside->icmp.type == ICMP_REDIRECT) {
/* Don't care about races here. */ /* Don't care about races here. */
if (info->initialized if (info->initialized
!= ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST)) != ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST))
|| info->num_manips != 0) || info->num_manips != 0)
return NF_DROP; return 0;
} }
DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n", DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n",
skb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); *pskb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
/* Note: May not be from a NAT'd host, but probably safest to /* Note: May not be from a NAT'd host, but probably safest to
do translation always as if it came from the host itself do translation always as if it came from the host itself
(even though a "host unreachable" coming from the host (even though a "host unreachable" coming from the host
...@@ -918,11 +924,13 @@ icmp_reply_translation(struct sk_buff *skb, ...@@ -918,11 +924,13 @@ icmp_reply_translation(struct sk_buff *skb,
? "DST" : "SRC", ? "DST" : "SRC",
NIPQUAD(info->manips[i].manip.ip), NIPQUAD(info->manips[i].manip.ip),
ntohs(info->manips[i].manip.u.udp.port)); ntohs(info->manips[i].manip.u.udp.port));
manip_pkt(inner->protocol, inner, if (manip_pkt(inside->ip.protocol, pskb,
skb->len - ((void *)inner - (void *)iph), (*pskb)->nh.iph->ihl*4
+ sizeof(inside->icmp),
&info->manips[i].manip, &info->manips[i].manip,
!info->manips[i].maniptype, !info->manips[i].maniptype) < 0)
&skb->nfcache); goto unlock_fail;
/* Outer packet needs to have IP header NATed like /* Outer packet needs to have IP header NATed like
it's a reply. */ it's a reply. */
...@@ -932,22 +940,82 @@ icmp_reply_translation(struct sk_buff *skb, ...@@ -932,22 +940,82 @@ icmp_reply_translation(struct sk_buff *skb,
info->manips[i].maniptype == IP_NAT_MANIP_SRC info->manips[i].maniptype == IP_NAT_MANIP_SRC
? "SRC" : "DST", ? "SRC" : "DST",
NIPQUAD(info->manips[i].manip.ip)); NIPQUAD(info->manips[i].manip.ip));
manip_pkt(0, iph, skb->len, if (manip_pkt(0, pskb, 0,
&info->manips[i].manip, &info->manips[i].manip,
info->manips[i].maniptype, info->manips[i].maniptype) < 0)
&skb->nfcache); goto unlock_fail;
} }
} }
READ_UNLOCK(&ip_nat_lock); READ_UNLOCK(&ip_nat_lock);
/* Since we mangled inside ICMP packet, recalculate its inside->icmp.checksum = 0;
checksum from scratch. (Hence the handling of incorrect inside->icmp.checksum = csum_fold(skb_checksum(*pskb,
checksums in conntrack, so we don't accidentally fix one.) */ (*pskb)->nh.iph->ihl*4,
hdr->checksum = 0; (*pskb)->len, 0));
hdr->checksum = ip_compute_csum((unsigned char *)hdr, return 1;
sizeof(*hdr) + datalen);
return NF_ACCEPT; unlock_fail:
READ_UNLOCK(&ip_nat_lock);
return 0;
}
int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
{
struct sk_buff *nskb;
unsigned int iplen;
if (writable_len > (*pskb)->len)
return 0;
/* Not exclusive use of packet? Must copy. */
if (skb_shared(*pskb) || skb_cloned(*pskb))
goto copy_skb;
/* Alexey says IP hdr is always modifiable and linear, so ok. */
if (writable_len <= (*pskb)->nh.iph->ihl*4)
return 1;
iplen = writable_len - (*pskb)->nh.iph->ihl*4;
/* DaveM says protocol headers are also modifiable. */
switch ((*pskb)->nh.iph->protocol) {
case IPPROTO_TCP: {
struct tcphdr hdr;
if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
&hdr, sizeof(hdr)) != 0)
goto copy_skb;
if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4)
goto pull_skb;
goto copy_skb;
}
case IPPROTO_UDP:
if (writable_len<=(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr))
goto pull_skb;
goto copy_skb;
case IPPROTO_ICMP:
if (writable_len
<= (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr))
goto pull_skb;
goto copy_skb;
/* Insert other cases here as desired */
}
copy_skb:
nskb = skb_copy(*pskb, GFP_ATOMIC);
if (!nskb)
return 0;
BUG_ON(skb_is_nonlinear(nskb));
/* Rest of kernel will get very unhappy if we pass it a
suddenly-orphaned skbuff */
if ((*pskb)->sk)
skb_set_owner_w(nskb, (*pskb)->sk);
kfree_skb(*pskb);
*pskb = nskb;
return 1;
pull_skb:
return pskb_may_pull(*pskb, writable_len);
} }
int __init ip_nat_init(void) int __init ip_nat_init(void)
......
...@@ -47,13 +47,13 @@ ...@@ -47,13 +47,13 @@
DECLARE_LOCK(ip_nat_seqofs_lock); DECLARE_LOCK(ip_nat_seqofs_lock);
static inline int /* Setup TCP sequence correction given this change at this sequence */
ip_nat_resize_packet(struct sk_buff **skb, static inline void
adjust_tcp_sequence(u32 seq,
int sizediff,
struct ip_conntrack *ct, struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo, enum ip_conntrack_info ctinfo)
int new_size)
{ {
struct iphdr *iph;
int dir; int dir;
struct ip_nat_seq *this_way, *other_way; struct ip_nat_seq *this_way, *other_way;
...@@ -65,25 +65,6 @@ ip_nat_resize_packet(struct sk_buff **skb, ...@@ -65,25 +65,6 @@ ip_nat_resize_packet(struct sk_buff **skb,
this_way = &ct->nat.info.seq[dir]; this_way = &ct->nat.info.seq[dir];
other_way = &ct->nat.info.seq[!dir]; other_way = &ct->nat.info.seq[!dir];
if (new_size > (*skb)->len + skb_tailroom(*skb)) {
struct sk_buff *newskb;
newskb = skb_copy_expand(*skb, skb_headroom(*skb),
new_size - (*skb)->len,
GFP_ATOMIC);
if (!newskb) {
printk("ip_nat_resize_packet: oom\n");
return 0;
} else {
kfree_skb(*skb);
*skb = newskb;
}
}
iph = (*skb)->nh.iph;
if (iph->protocol == IPPROTO_TCP) {
struct tcphdr *tcph = (void *)iph + iph->ihl*4;
DEBUGP("ip_nat_resize_packet: Seq_offset before: "); DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
DUMP_OFFSET(this_way); DUMP_OFFSET(this_way);
...@@ -94,23 +75,79 @@ ip_nat_resize_packet(struct sk_buff **skb, ...@@ -94,23 +75,79 @@ ip_nat_resize_packet(struct sk_buff **skb,
* adjustment in the window, but do deal with common case of a * adjustment in the window, but do deal with common case of a
* retransmit */ * retransmit */
if (this_way->offset_before == this_way->offset_after if (this_way->offset_before == this_way->offset_after
|| before(this_way->correction_pos, ntohl(tcph->seq))) { || before(this_way->correction_pos, seq)) {
this_way->correction_pos = ntohl(tcph->seq); this_way->correction_pos = seq;
this_way->offset_before = this_way->offset_after; this_way->offset_before = this_way->offset_after;
this_way->offset_after = (int32_t) this_way->offset_after += sizediff;
this_way->offset_before + new_size -
(*skb)->len;
} }
UNLOCK_BH(&ip_nat_seqofs_lock); UNLOCK_BH(&ip_nat_seqofs_lock);
DEBUGP("ip_nat_resize_packet: Seq_offset after: "); DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
DUMP_OFFSET(this_way); DUMP_OFFSET(this_way);
}
/* Frobs data inside this packet, which is linear. */
static void mangle_contents(struct sk_buff *skb,
unsigned int dataoff,
unsigned int match_offset,
unsigned int match_len,
const char *rep_buffer,
unsigned int rep_len)
{
unsigned char *data;
BUG_ON(skb_is_nonlinear(skb));
data = (unsigned char *)skb->nh.iph + dataoff;
/* move post-replacement */
memmove(data + match_offset + rep_len,
data + match_offset + match_len,
skb->tail - (data + match_offset + match_len));
/* insert data from buffer */
memcpy(data + match_offset, rep_buffer, rep_len);
/* update skb info */
if (rep_len > match_len) {
DEBUGP("ip_nat_mangle_packet: Extending packet by "
"%u from %u bytes\n", rep_len - match_len,
skb->len);
skb_put(skb, rep_len - match_len);
} else {
DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
"%u from %u bytes\n", match_len - rep_len,
skb->len);
__skb_trim(skb, skb->len + rep_len - match_len);
} }
return 1; /* fix IP hdr checksum information */
skb->nh.iph->tot_len = htons(skb->len);
ip_send_check(skb->nh.iph);
skb->csum = csum_partial(data, skb->len - dataoff, 0);
} }
/* Unusual, but possible case. */
static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
{
struct sk_buff *nskb;
if ((*pskb)->len + extra > 65535)
return 0;
nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
if (!nskb)
return 0;
/* Transfer socket to new skb. */
if ((*pskb)->sk)
skb_set_owner_w(nskb, (*pskb)->sk);
#ifdef CONFIG_NETFILTER_DEBUG
nskb->nf_debug = (*pskb)->nf_debug;
#endif
kfree_skb(*pskb);
*pskb = nskb;
return 1;
}
/* Generic function for mangling variable-length address changes inside /* Generic function for mangling variable-length address changes inside
* NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
...@@ -121,91 +158,41 @@ ip_nat_resize_packet(struct sk_buff **skb, ...@@ -121,91 +158,41 @@ ip_nat_resize_packet(struct sk_buff **skb,
* *
* */ * */
int int
ip_nat_mangle_tcp_packet(struct sk_buff **skb, ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
struct ip_conntrack *ct, struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo, enum ip_conntrack_info ctinfo,
unsigned int match_offset, unsigned int match_offset,
unsigned int match_len, unsigned int match_len,
char *rep_buffer, const char *rep_buffer,
unsigned int rep_len) unsigned int rep_len)
{ {
struct iphdr *iph = (*skb)->nh.iph; struct iphdr *iph;
struct tcphdr *tcph; struct tcphdr *tcph;
unsigned char *data;
u_int32_t tcplen, newlen, newtcplen;
tcplen = (*skb)->len - iph->ihl*4; if (!skb_ip_make_writable(pskb, (*pskb)->len))
newtcplen = tcplen - match_len + rep_len;
newlen = iph->ihl*4 + newtcplen;
if (newlen > 65535) {
if (net_ratelimit())
printk("ip_nat_mangle_tcp_packet: nat'ed packet "
"exceeds maximum packet size\n");
return 0; return 0;
}
if ((*skb)->len != newlen) { if (rep_len > match_len
if (!ip_nat_resize_packet(skb, ct, ctinfo, newlen)) { && rep_len - match_len > skb_tailroom(*pskb)
printk("resize_packet failed!!\n"); && !enlarge_skb(pskb, rep_len - match_len))
return 0; return 0;
}
}
/* Alexey says: if a hook changes _data_ ... it can break SKB_LINEAR_ASSERT(*pskb);
original packet sitting in tcp queue and this is fatal */
if (skb_cloned(*skb)) {
struct sk_buff *nskb = skb_copy(*skb, GFP_ATOMIC);
if (!nskb) {
if (net_ratelimit())
printk("Out of memory cloning TCP packet\n");
return 0;
}
/* Rest of kernel will get very unhappy if we pass it
a suddenly-orphaned skbuff */
if ((*skb)->sk)
skb_set_owner_w(nskb, (*skb)->sk);
kfree_skb(*skb);
*skb = nskb;
}
/* skb may be copied !! */ iph = (*pskb)->nh.iph;
iph = (*skb)->nh.iph;
tcph = (void *)iph + iph->ihl*4; tcph = (void *)iph + iph->ihl*4;
data = (void *)tcph + tcph->doff*4;
if (rep_len != match_len) mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
/* move post-replacement */ match_offset, match_len, rep_buffer, rep_len);
memmove(data + match_offset + rep_len,
data + match_offset + match_len,
(*skb)->tail - (data + match_offset + match_len));
/* insert data from buffer */
memcpy(data + match_offset, rep_buffer, rep_len);
/* update skb info */
if (newlen > (*skb)->len) {
DEBUGP("ip_nat_mangle_tcp_packet: Extending packet by "
"%u to %u bytes\n", newlen - (*skb)->len, newlen);
skb_put(*skb, newlen - (*skb)->len);
} else {
DEBUGP("ip_nat_mangle_tcp_packet: Shrinking packet from "
"%u to %u bytes\n", (*skb)->len, newlen);
skb_trim(*skb, newlen);
}
/* fix checksum information */
iph->tot_len = htons(newlen);
(*skb)->csum = csum_partial((char *)tcph + tcph->doff*4,
newtcplen - tcph->doff*4, 0);
tcph->check = 0; tcph->check = 0;
tcph->check = tcp_v4_check(tcph, newtcplen, iph->saddr, iph->daddr, tcph->check = tcp_v4_check(tcph, (*pskb)->len - iph->ihl*4,
iph->saddr, iph->daddr,
csum_partial((char *)tcph, tcph->doff*4, csum_partial((char *)tcph, tcph->doff*4,
(*skb)->csum)); (*pskb)->csum));
ip_send_check(iph); adjust_tcp_sequence(ntohl(tcph->seq),
(int)match_len - (int)rep_len,
ct, ctinfo);
return 1; return 1;
} }
...@@ -220,219 +207,164 @@ ip_nat_mangle_tcp_packet(struct sk_buff **skb, ...@@ -220,219 +207,164 @@ ip_nat_mangle_tcp_packet(struct sk_buff **skb,
* should be fairly easy to do. * should be fairly easy to do.
*/ */
int int
ip_nat_mangle_udp_packet(struct sk_buff **skb, ip_nat_mangle_udp_packet(struct sk_buff **pskb,
struct ip_conntrack *ct, struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo, enum ip_conntrack_info ctinfo,
unsigned int match_offset, unsigned int match_offset,
unsigned int match_len, unsigned int match_len,
char *rep_buffer, const char *rep_buffer,
unsigned int rep_len) unsigned int rep_len)
{ {
struct iphdr *iph = (*skb)->nh.iph; struct iphdr *iph;
struct udphdr *udph = (void *)iph + iph->ihl * 4; struct udphdr *udph;
unsigned char *data; int need_csum = ((*pskb)->csum != 0);
u_int32_t udplen, newlen, newudplen;
udplen = (*skb)->len - iph->ihl*4;
newudplen = udplen - match_len + rep_len;
newlen = iph->ihl*4 + newudplen;
if (newlen > 65535) {
if (net_ratelimit())
printk("ip_nat_mangle_udp_packet: nat'ed packet "
"exceeds maximum packet size\n");
return 0;
}
if ((*skb)->len != newlen) { if (!skb_ip_make_writable(pskb, (*pskb)->len))
if (!ip_nat_resize_packet(skb, ct, ctinfo, newlen)) {
printk("resize_packet failed!!\n");
return 0; return 0;
}
}
/* Alexey says: if a hook changes _data_ ... it can break if (rep_len > match_len
original packet sitting in tcp queue and this is fatal */ && rep_len - match_len > skb_tailroom(*pskb)
if (skb_cloned(*skb)) { && !enlarge_skb(pskb, rep_len - match_len))
struct sk_buff *nskb = skb_copy(*skb, GFP_ATOMIC);
if (!nskb) {
if (net_ratelimit())
printk("Out of memory cloning TCP packet\n");
return 0; return 0;
}
/* Rest of kernel will get very unhappy if we pass it
a suddenly-orphaned skbuff */
if ((*skb)->sk)
skb_set_owner_w(nskb, (*skb)->sk);
kfree_skb(*skb);
*skb = nskb;
}
/* skb may be copied !! */ iph = (*pskb)->nh.iph;
iph = (*skb)->nh.iph;
udph = (void *)iph + iph->ihl*4; udph = (void *)iph + iph->ihl*4;
data = (void *)udph + sizeof(struct udphdr); mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
match_offset, match_len, rep_buffer, rep_len);
if (rep_len != match_len)
/* move post-replacement */
memmove(data + match_offset + rep_len,
data + match_offset + match_len,
(*skb)->tail - (data + match_offset + match_len));
/* insert data from buffer */
memcpy(data + match_offset, rep_buffer, rep_len);
/* update skb info */ /* update the length of the UDP packet */
if (newlen > (*skb)->len) { udph->len = htons((*pskb)->len - iph->ihl*4);
DEBUGP("ip_nat_mangle_udp_packet: Extending packet by "
"%u to %u bytes\n", newlen - (*skb)->len, newlen);
skb_put(*skb, newlen - (*skb)->len);
} else {
DEBUGP("ip_nat_mangle_udp_packet: Shrinking packet from "
"%u to %u bytes\n", (*skb)->len, newlen);
skb_trim(*skb, newlen);
}
/* update the length of the UDP and IP packets to the new values*/
udph->len = htons((*skb)->len - iph->ihl*4);
iph->tot_len = htons(newlen);
/* fix udp checksum if udp checksum was previously calculated */ /* fix udp checksum if udp checksum was previously calculated */
if ((*skb)->csum != 0) { if (need_csum) {
(*skb)->csum = csum_partial((char *)udph +
sizeof(struct udphdr),
newudplen - sizeof(struct udphdr),
0);
udph->check = 0; udph->check = 0;
udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, udph->check
newudplen, IPPROTO_UDP, = csum_tcpudp_magic(iph->saddr, iph->daddr,
(*pskb)->len - iph->ihl*4,
IPPROTO_UDP,
csum_partial((char *)udph, csum_partial((char *)udph,
sizeof(struct udphdr), sizeof(struct udphdr),
(*skb)->csum)); (*pskb)->csum));
} } else
(*pskb)->csum = 0;
ip_send_check(iph);
return 1; return 1;
} }
/* Adjust one found SACK option including checksum correction */ /* Adjust one found SACK option including checksum correction */
static void static void
sack_adjust(struct tcphdr *tcph, sack_adjust(struct sk_buff *skb,
unsigned char *ptr, struct tcphdr *tcph,
unsigned int sackoff,
unsigned int sackend,
struct ip_nat_seq *natseq) struct ip_nat_seq *natseq)
{ {
struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); while (sackoff < sackend) {
int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; struct tcp_sack_block *sack;
int i;
for (i = 0; i < num_sacks; i++, sp++) {
u_int32_t new_start_seq, new_end_seq; u_int32_t new_start_seq, new_end_seq;
if (after(ntohl(sp->start_seq) - natseq->offset_before, sack = (void *)skb->data + sackoff;
if (after(ntohl(sack->start_seq) - natseq->offset_before,
natseq->correction_pos)) natseq->correction_pos))
new_start_seq = ntohl(sp->start_seq) new_start_seq = ntohl(sack->start_seq)
- natseq->offset_after; - natseq->offset_after;
else else
new_start_seq = ntohl(sp->start_seq) new_start_seq = ntohl(sack->start_seq)
- natseq->offset_before; - natseq->offset_before;
new_start_seq = htonl(new_start_seq); new_start_seq = htonl(new_start_seq);
if (after(ntohl(sp->end_seq) - natseq->offset_before, if (after(ntohl(sack->end_seq) - natseq->offset_before,
natseq->correction_pos)) natseq->correction_pos))
new_end_seq = ntohl(sp->end_seq) new_end_seq = ntohl(sack->end_seq)
- natseq->offset_after; - natseq->offset_after;
else else
new_end_seq = ntohl(sp->end_seq) new_end_seq = ntohl(sack->end_seq)
- natseq->offset_before; - natseq->offset_before;
new_end_seq = htonl(new_end_seq); new_end_seq = htonl(new_end_seq);
DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
ntohl(sp->start_seq), new_start_seq, ntohl(sack->start_seq), new_start_seq,
ntohl(sp->end_seq), new_end_seq); ntohl(sack->end_seq), new_end_seq);
tcph->check = tcph->check =
ip_nat_cheat_check(~sp->start_seq, new_start_seq, ip_nat_cheat_check(~sack->start_seq, new_start_seq,
ip_nat_cheat_check(~sp->end_seq, ip_nat_cheat_check(~sack->end_seq,
new_end_seq, new_end_seq,
tcph->check)); tcph->check));
sack->start_seq = new_start_seq;
sp->start_seq = new_start_seq; sack->end_seq = new_end_seq;
sp->end_seq = new_end_seq; sackoff += sizeof(*sack);
} }
} }
/* TCP SACK sequence number adjustment */
/* TCP SACK sequence number adjustment, return 0 if sack found and adjusted */ static inline unsigned int
static inline int ip_nat_sack_adjust(struct sk_buff **pskb,
ip_nat_sack_adjust(struct sk_buff *skb, struct tcphdr *tcph,
struct ip_conntrack *ct, struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo) enum ip_conntrack_info ctinfo)
{ {
struct iphdr *iph; unsigned int dir, optoff, optend;
struct tcphdr *tcph;
unsigned char *ptr;
int length, dir, sack_adjusted = 0;
iph = skb->nh.iph; optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
tcph = (void *)iph + iph->ihl*4; optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
length = (tcph->doff*4)-sizeof(struct tcphdr);
ptr = (unsigned char *)(tcph+1); if (!skb_ip_make_writable(pskb, optend))
return 0;
dir = CTINFO2DIR(ctinfo); dir = CTINFO2DIR(ctinfo);
while (length > 0) { while (optoff < optend) {
int opcode = *ptr++; /* Usually: option, length. */
int opsize; unsigned char *op = (*pskb)->data + optoff;
switch (opcode) { switch (op[0]) {
case TCPOPT_EOL: case TCPOPT_EOL:
return !sack_adjusted; return 1;
case TCPOPT_NOP: case TCPOPT_NOP:
length--; optoff++;
continue; continue;
default: default:
opsize = *ptr++; /* no partial options */
if (opsize > length) /* no partial opts */ if (optoff + 1 == optend
return !sack_adjusted; || optoff + op[1] > optend
if (opcode == TCPOPT_SACK) { || op[1] < 2)
/* found SACK */ return 0;
if((opsize >= (TCPOLEN_SACK_BASE if (op[0] == TCPOPT_SACK
+TCPOLEN_SACK_PERBLOCK)) && && op[1] >= 2+TCPOLEN_SACK_PERBLOCK
!((opsize - TCPOLEN_SACK_BASE) && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
% TCPOLEN_SACK_PERBLOCK)) sack_adjust(*pskb, tcph, optoff+2,
sack_adjust(tcph, ptr-2, optoff+op[1],
&ct->nat.info.seq[!dir]); &ct->nat.info.seq[!dir]);
optoff += op[1];
sack_adjusted = 1;
} }
ptr += opsize-2;
length -= opsize;
} }
} return 1;
return !sack_adjusted;
} }
/* TCP sequence number adjustment */ /* TCP sequence number adjustment. Returns true or false. */
int int
ip_nat_seq_adjust(struct sk_buff *skb, ip_nat_seq_adjust(struct sk_buff **pskb,
struct ip_conntrack *ct, struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo) enum ip_conntrack_info ctinfo)
{ {
struct iphdr *iph;
struct tcphdr *tcph; struct tcphdr *tcph;
int dir, newseq, newack; int dir, newseq, newack;
struct ip_nat_seq *this_way, *other_way; struct ip_nat_seq *this_way, *other_way;
iph = skb->nh.iph;
tcph = (void *)iph + iph->ihl*4;
dir = CTINFO2DIR(ctinfo); dir = CTINFO2DIR(ctinfo);
this_way = &ct->nat.info.seq[dir]; this_way = &ct->nat.info.seq[dir];
other_way = &ct->nat.info.seq[!dir]; other_way = &ct->nat.info.seq[!dir];
/* No adjustments to make? Very common case. */
if (!this_way->offset_before && !this_way->offset_after
&& !other_way->offset_before && !other_way->offset_after)
return 1;
if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
return 0;
tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
if (after(ntohl(tcph->seq), this_way->correction_pos)) if (after(ntohl(tcph->seq), this_way->correction_pos))
newseq = ntohl(tcph->seq) + this_way->offset_after; newseq = ntohl(tcph->seq) + this_way->offset_after;
else else
...@@ -458,9 +390,7 @@ ip_nat_seq_adjust(struct sk_buff *skb, ...@@ -458,9 +390,7 @@ ip_nat_seq_adjust(struct sk_buff *skb,
tcph->seq = newseq; tcph->seq = newseq;
tcph->ack_seq = newack; tcph->ack_seq = newack;
ip_nat_sack_adjust(skb, ct, ctinfo); return ip_nat_sack_adjust(pskb, tcph, ct, ctinfo);
return 0;
} }
static inline int static inline int
......
...@@ -42,17 +42,24 @@ icmp_unique_tuple(struct ip_conntrack_tuple *tuple, ...@@ -42,17 +42,24 @@ icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
return 0; return 0;
} }
static void static int
icmp_manip_pkt(struct iphdr *iph, size_t len, icmp_manip_pkt(struct sk_buff **pskb,
unsigned int hdroff,
const struct ip_conntrack_manip *manip, const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype) enum ip_nat_manip_type maniptype)
{ {
struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); struct icmphdr *hdr;
if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
return 0;
hdr = (void *)(*pskb)->data + hdroff;
hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
manip->u.icmp.id, manip->u.icmp.id,
hdr->checksum); hdr->checksum);
hdr->un.echo.id = manip->u.icmp.id; hdr->un.echo.id = manip->u.icmp.id;
return 1;
} }
static unsigned int static unsigned int
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/netfilter_ipv4/ip_nat.h> #include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h> #include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/netfilter_ipv4/ip_nat_protocol.h> #include <linux/netfilter_ipv4/ip_nat_protocol.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
static int static int
tcp_in_range(const struct ip_conntrack_tuple *tuple, tcp_in_range(const struct ip_conntrack_tuple *tuple,
...@@ -73,36 +74,49 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple, ...@@ -73,36 +74,49 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
return 0; return 0;
} }
static void static int
tcp_manip_pkt(struct iphdr *iph, size_t len, tcp_manip_pkt(struct sk_buff **pskb,
unsigned int hdroff,
const struct ip_conntrack_manip *manip, const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype) enum ip_nat_manip_type maniptype)
{ {
struct tcphdr *hdr = (struct tcphdr *)((u_int32_t *)iph + iph->ihl); struct tcphdr *hdr;
u_int32_t oldip; u_int32_t oldip;
u_int16_t *portptr; u_int16_t *portptr, oldport;
int hdrsize = 8; /* TCP connection tracking guarantees this much */
/* this could be a inner header returned in icmp packet; in such
cases we cannot update the checksum field since it is outside of
the 8 bytes of transport layer headers we are guaranteed */
if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr);
if (!skb_ip_make_writable(pskb, hdroff + hdrsize))
return 0;
hdr = (void *)(*pskb)->data + hdroff;
if (maniptype == IP_NAT_MANIP_SRC) { if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */ /* Get rid of src ip and src pt */
oldip = iph->saddr; oldip = (*pskb)->nh.iph->saddr;
portptr = &hdr->source; portptr = &hdr->source;
} else { } else {
/* Get rid of dst ip and dst pt */ /* Get rid of dst ip and dst pt */
oldip = iph->daddr; oldip = (*pskb)->nh.iph->daddr;
portptr = &hdr->dest; portptr = &hdr->dest;
} }
/* this could be a inner header returned in icmp packet; in such oldport = *portptr;
cases we cannot update the checksum field since it is outside of *portptr = manip->u.tcp.port;
the 8 bytes of transport layer headers we are guaranteed */
if(((void *)&hdr->check + sizeof(hdr->check) - (void *)iph) <= len) { if (hdrsize < sizeof(*hdr))
return 1;
hdr->check = ip_nat_cheat_check(~oldip, manip->ip, hdr->check = ip_nat_cheat_check(~oldip, manip->ip,
ip_nat_cheat_check(*portptr ^ 0xFFFF, ip_nat_cheat_check(oldport ^ 0xFFFF,
manip->u.tcp.port, manip->u.tcp.port,
hdr->check)); hdr->check));
} return 1;
*portptr = manip->u.tcp.port;
} }
static unsigned int static unsigned int
......
...@@ -72,22 +72,27 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple, ...@@ -72,22 +72,27 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple,
return 0; return 0;
} }
static void static int
udp_manip_pkt(struct iphdr *iph, size_t len, udp_manip_pkt(struct sk_buff **pskb,
unsigned int hdroff,
const struct ip_conntrack_manip *manip, const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype) enum ip_nat_manip_type maniptype)
{ {
struct udphdr *hdr = (struct udphdr *)((u_int32_t *)iph + iph->ihl); struct udphdr *hdr;
u_int32_t oldip; u_int32_t oldip;
u_int16_t *portptr; u_int16_t *portptr;
if (!skb_ip_make_writable(pskb, hdroff + sizeof(hdr)))
return 0;
hdr = (void *)(*pskb)->data + hdroff;
if (maniptype == IP_NAT_MANIP_SRC) { if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */ /* Get rid of src ip and src pt */
oldip = iph->saddr; oldip = (*pskb)->nh.iph->saddr;
portptr = &hdr->source; portptr = &hdr->source;
} else { } else {
/* Get rid of dst ip and dst pt */ /* Get rid of dst ip and dst pt */
oldip = iph->daddr; oldip = (*pskb)->nh.iph->daddr;
portptr = &hdr->dest; portptr = &hdr->dest;
} }
if (hdr->check) /* 0 is a special case meaning no checksum */ if (hdr->check) /* 0 is a special case meaning no checksum */
...@@ -96,6 +101,7 @@ udp_manip_pkt(struct iphdr *iph, size_t len, ...@@ -96,6 +101,7 @@ udp_manip_pkt(struct iphdr *iph, size_t len,
manip->u.udp.port, manip->u.udp.port,
hdr->check)); hdr->check));
*portptr = manip->u.udp.port; *portptr = manip->u.udp.port;
return 1;
} }
static unsigned int static unsigned int
......
...@@ -29,12 +29,13 @@ static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple, ...@@ -29,12 +29,13 @@ static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
return 0; return 0;
} }
static void static int
unknown_manip_pkt(struct iphdr *iph, size_t len, unknown_manip_pkt(struct sk_buff **pskb,
unsigned int hdroff,
const struct ip_conntrack_manip *manip, const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype) enum ip_nat_manip_type maniptype)
{ {
return; return 1;
} }
static unsigned int static unsigned int
......
...@@ -71,10 +71,6 @@ ip_nat_fn(unsigned int hooknum, ...@@ -71,10 +71,6 @@ ip_nat_fn(unsigned int hooknum,
/* maniptype == SRC for postrouting. */ /* maniptype == SRC for postrouting. */
enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum); enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
/* FIXME: Push down to extensions --RR */
if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
return NF_DROP;
/* We never see fragments: conntrack defrags on pre-routing /* We never see fragments: conntrack defrags on pre-routing
and local-out, and ip_nat_out protects post-routing. */ and local-out, and ip_nat_out protects post-routing. */
IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
...@@ -95,12 +91,14 @@ ip_nat_fn(unsigned int hooknum, ...@@ -95,12 +91,14 @@ ip_nat_fn(unsigned int hooknum,
/* Exception: ICMP redirect to new connection (not in /* Exception: ICMP redirect to new connection (not in
hash table yet). We must not let this through, in hash table yet). We must not let this through, in
case we're doing NAT to the same network. */ case we're doing NAT to the same network. */
struct iphdr *iph = (*pskb)->nh.iph; if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
struct icmphdr *hdr = (struct icmphdr *) struct icmphdr hdr;
((u_int32_t *)iph + iph->ihl);
if (iph->protocol == IPPROTO_ICMP if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
&& hdr->type == ICMP_REDIRECT) &hdr, sizeof(hdr)) == 0
&& hdr.type == ICMP_REDIRECT)
return NF_DROP; return NF_DROP;
}
return NF_ACCEPT; return NF_ACCEPT;
} }
...@@ -108,8 +106,11 @@ ip_nat_fn(unsigned int hooknum, ...@@ -108,8 +106,11 @@ ip_nat_fn(unsigned int hooknum,
case IP_CT_RELATED: case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY: case IP_CT_RELATED+IP_CT_IS_REPLY:
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
return icmp_reply_translation(*pskb, ct, hooknum, if (!icmp_reply_translation(pskb, ct, hooknum,
CTINFO2DIR(ctinfo)); CTINFO2DIR(ctinfo)))
return NF_DROP;
else
return NF_ACCEPT;
} }
/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
case IP_CT_NEW: case IP_CT_NEW:
...@@ -174,10 +175,6 @@ ip_nat_out(unsigned int hooknum, ...@@ -174,10 +175,6 @@ ip_nat_out(unsigned int hooknum,
const struct net_device *out, const struct net_device *out,
int (*okfn)(struct sk_buff *)) int (*okfn)(struct sk_buff *))
{ {
/* FIXME: Push down to extensions --RR */
if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
return NF_DROP;
/* root is playing with raw sockets. */ /* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr) if ((*pskb)->len < sizeof(struct iphdr)
|| (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
...@@ -213,10 +210,6 @@ ip_nat_local_fn(unsigned int hooknum, ...@@ -213,10 +210,6 @@ ip_nat_local_fn(unsigned int hooknum,
u_int32_t saddr, daddr; u_int32_t saddr, daddr;
unsigned int ret; unsigned int ret;
/* FIXME: Push down to extensions --RR */
if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
return NF_DROP;
/* root is playing with raw sockets. */ /* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr) if ((*pskb)->len < sizeof(struct iphdr)
|| (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
...@@ -387,4 +380,5 @@ EXPORT_SYMBOL(ip_nat_cheat_check); ...@@ -387,4 +380,5 @@ EXPORT_SYMBOL(ip_nat_cheat_check);
EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
EXPORT_SYMBOL(ip_nat_mangle_udp_packet); EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
EXPORT_SYMBOL(ip_nat_used_tuple); EXPORT_SYMBOL(ip_nat_used_tuple);
EXPORT_SYMBOL(skb_ip_make_writable);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
...@@ -57,9 +57,7 @@ tftp_nat_help(struct ip_conntrack *ct, ...@@ -57,9 +57,7 @@ tftp_nat_help(struct ip_conntrack *ct,
struct sk_buff **pskb) struct sk_buff **pskb)
{ {
int dir = CTINFO2DIR(ctinfo); int dir = CTINFO2DIR(ctinfo);
struct iphdr *iph = (*pskb)->nh.iph; struct tftphdr tftph;
struct udphdr *udph = (void *)iph + iph->ihl * 4;
struct tftphdr *tftph = (void *)udph + 8;
struct ip_conntrack_tuple repl; struct ip_conntrack_tuple repl;
if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL) if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL)
...@@ -71,7 +69,11 @@ tftp_nat_help(struct ip_conntrack *ct, ...@@ -71,7 +69,11 @@ tftp_nat_help(struct ip_conntrack *ct,
return NF_ACCEPT; return NF_ACCEPT;
} }
switch (ntohs(tftph->opcode)) { if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
&tftph, sizeof(tftph)) != 0)
return NF_DROP;
switch (ntohs(tftph.opcode)) {
/* RRQ and WRQ works the same way */ /* RRQ and WRQ works the same way */
case TFTP_OPCODE_READ: case TFTP_OPCODE_READ:
case TFTP_OPCODE_WRITE: case TFTP_OPCODE_WRITE:
...@@ -104,8 +106,10 @@ tftp_nat_expected(struct sk_buff **pskb, ...@@ -104,8 +106,10 @@ tftp_nat_expected(struct sk_buff **pskb,
#if 0 #if 0
const struct ip_conntrack_tuple *repl = const struct ip_conntrack_tuple *repl =
&master->tuplehash[IP_CT_DIR_REPLY].tuple; &master->tuplehash[IP_CT_DIR_REPLY].tuple;
struct iphdr *iph = (*pskb)->nh.iph; struct udphdr udph;
struct udphdr *udph = (void *)iph + iph->ihl*4;
if (skb_copy_bits(*pskb,(*pskb)->nh.iph->ihl*4,&udph,sizeof(udph))!=0)
return NF_DROP;
#endif #endif
IP_NF_ASSERT(info); IP_NF_ASSERT(info);
...@@ -119,8 +123,8 @@ tftp_nat_expected(struct sk_buff **pskb, ...@@ -119,8 +123,8 @@ tftp_nat_expected(struct sk_buff **pskb,
mr.range[0].min_ip = mr.range[0].max_ip = orig->dst.ip; mr.range[0].min_ip = mr.range[0].max_ip = orig->dst.ip;
DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u " DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u "
"newsrc: %u.%u.%u.%u\n", "newsrc: %u.%u.%u.%u\n",
NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph->source), NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph.source),
NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph->dest), NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph.dest),
NIPQUAD(orig->dst.ip)); NIPQUAD(orig->dst.ip));
} else { } else {
mr.range[0].min_ip = mr.range[0].max_ip = orig->src.ip; mr.range[0].min_ip = mr.range[0].max_ip = orig->src.ip;
...@@ -130,8 +134,8 @@ tftp_nat_expected(struct sk_buff **pskb, ...@@ -130,8 +134,8 @@ tftp_nat_expected(struct sk_buff **pskb,
DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u " DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u "
"newdst: %u.%u.%u.%u:%u\n", "newdst: %u.%u.%u.%u:%u\n",
NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph->source), NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph.source),
NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph->dest), NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph.dest),
NIPQUAD(orig->src.ip), ntohs(orig->src.u.udp.port)); NIPQUAD(orig->src.ip), ntohs(orig->src.u.udp.port));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment