Commit 70d0a61c authored by Rusty Russell's avatar Rusty Russell Committed by David S. Miller

[NETFILTER]: Make NAT code handle non-linear skbs.

Makes the NAT code and all NAT helpers handle non-linear skbs.
Main trick is to introduce skb_ip_make_writable which handles all
the decloning, linearizing, etc.
parent 09f2947d
......@@ -16,10 +16,10 @@ extern unsigned int do_bindings(struct ip_conntrack *ct,
extern struct list_head protos;
extern unsigned int icmp_reply_translation(struct sk_buff *skb,
struct ip_conntrack *conntrack,
unsigned int hooknum,
int dir);
extern int icmp_reply_translation(struct sk_buff **pskb,
struct ip_conntrack *conntrack,
unsigned int hooknum,
int dir);
extern void replace_in_hashes(struct ip_conntrack *conntrack,
struct ip_nat_info *info);
......@@ -30,4 +30,10 @@ extern void place_in_hashes(struct ip_conntrack *conntrack,
extern struct ip_nat_protocol ip_nat_protocol_tcp;
extern struct ip_nat_protocol ip_nat_protocol_udp;
extern struct ip_nat_protocol ip_nat_protocol_icmp;
/* Call this before modifying an existing IP packet: ensures it is
modifiable and linear to the point you care about (writable_len).
Returns true or false. */
extern int skb_ip_make_writable(struct sk_buff **pskb,
unsigned int writable_len);
#endif /* _IP_NAT_CORE_H */
......@@ -43,22 +43,23 @@ extern struct list_head helpers;
extern int ip_nat_helper_register(struct ip_nat_helper *me);
extern void ip_nat_helper_unregister(struct ip_nat_helper *me);
/* These return true or false. */
extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int match_offset,
unsigned int match_len,
char *rep_buffer,
const char *rep_buffer,
unsigned int rep_len);
extern int ip_nat_mangle_udp_packet(struct sk_buff **skb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int match_offset,
unsigned int match_len,
char *rep_buffer,
const char *rep_buffer,
unsigned int rep_len);
extern int ip_nat_seq_adjust(struct sk_buff *skb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo);
extern void ip_nat_delete_sack(struct sk_buff *skb);
extern int ip_nat_seq_adjust(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo);
#endif
......@@ -18,10 +18,11 @@ struct ip_nat_protocol
unsigned int protonum;
/* Do a packet translation according to the ip_nat_proto_manip
* and manip type. */
void (*manip_pkt)(struct iphdr *iph, size_t len,
const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype);
* and manip type. Return true if succeeded. */
int (*manip_pkt)(struct sk_buff **pskb,
unsigned int hdroff,
const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype);
/* Is the manipable part of the tuple between min and max incl? */
int (*in_range)(const struct ip_conntrack_tuple *tuple,
......
......@@ -13,6 +13,8 @@
#include <net/icmp.h>
#include <net/ip.h>
#include <net/tcp.h> /* For tcp_prot in getorigdst */
#include <linux/icmp.h>
#include <linux/udp.h>
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
......@@ -698,14 +700,26 @@ void place_in_hashes(struct ip_conntrack *conntrack,
list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
}
static void
manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len,
/* Returns true if succeeded. */
static int
manip_pkt(u_int16_t proto,
struct sk_buff **pskb,
unsigned int iphdroff,
const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype,
__u32 *nfcache)
enum ip_nat_manip_type maniptype)
{
*nfcache |= NFC_ALTERED;
find_nat_proto(proto)->manip_pkt(iph, len, manip, maniptype);
struct iphdr *iph;
(*pskb)->nfcache |= NFC_ALTERED;
if (!skb_ip_make_writable(pskb, iphdroff+sizeof(iph)))
return 0;
iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */
if (!find_nat_proto(proto)->manip_pkt(pskb, iphdroff + iph->ihl*4,
manip, maniptype))
return 0;
if (maniptype == IP_NAT_MANIP_SRC) {
iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip,
......@@ -716,17 +730,7 @@ manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len,
iph->check);
iph->daddr = manip->ip;
}
#if 0
if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
DEBUGP("IP: checksum on packet bad.\n");
if (proto == IPPROTO_TCP) {
void *th = (u_int32_t *)iph + iph->ihl;
if (tcp_v4_check(th, len - 4*iph->ihl, iph->saddr, iph->daddr,
csum_partial((char *)th, len-4*iph->ihl, 0)))
DEBUGP("TCP: checksum on packet bad\n");
}
#endif
return 1;
}
static inline int exp_for_packet(struct ip_conntrack_expect *exp,
......@@ -754,25 +758,13 @@ do_bindings(struct ip_conntrack *ct,
unsigned int i;
struct ip_nat_helper *helper;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
int is_tcp = (*pskb)->nh.iph->protocol == IPPROTO_TCP;
int proto = (*pskb)->nh.iph->protocol;
/* Need nat lock to protect against modification, but neither
conntrack (referenced) and helper (deleted with
synchronize_bh()) can vanish. */
READ_LOCK(&ip_nat_lock);
for (i = 0; i < info->num_manips; i++) {
/* raw socket (tcpdump) may have clone of incoming
skb: don't disturb it --RR */
if (skb_cloned(*pskb) && !(*pskb)->sk) {
struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
if (!nskb) {
READ_UNLOCK(&ip_nat_lock);
return NF_DROP;
}
kfree_skb(*pskb);
*pskb = nskb;
}
if (info->manips[i].direction == dir
&& info->manips[i].hooknum == hooknum) {
DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n",
......@@ -781,12 +773,12 @@ do_bindings(struct ip_conntrack *ct,
? "SRC" : "DST",
NIPQUAD(info->manips[i].manip.ip),
htons(info->manips[i].manip.u.all));
manip_pkt((*pskb)->nh.iph->protocol,
(*pskb)->nh.iph,
(*pskb)->len,
&info->manips[i].manip,
info->manips[i].maniptype,
&(*pskb)->nfcache);
if (manip_pkt(proto, pskb, 0,
&info->manips[i].manip,
info->manips[i].maniptype) < 0) {
READ_UNLOCK(&ip_nat_lock);
return NF_DROP;
}
}
}
helper = info->helper;
......@@ -839,12 +831,14 @@ do_bindings(struct ip_conntrack *ct,
/* Adjust sequence number only once per packet
* (helper is called at all hooks) */
if (is_tcp && (hooknum == NF_IP_POST_ROUTING
|| hooknum == NF_IP_LOCAL_IN)) {
if (proto == IPPROTO_TCP
&& (hooknum == NF_IP_POST_ROUTING
|| hooknum == NF_IP_LOCAL_IN)) {
DEBUGP("ip_nat_core: adjusting sequence number\n");
/* future: put this in a l4-proto specific function,
* and call this function here. */
ip_nat_seq_adjust(*pskb, ct, ctinfo);
if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
ret = NF_DROP;
}
return ret;
......@@ -855,39 +849,51 @@ do_bindings(struct ip_conntrack *ct,
/* not reached */
}
unsigned int
icmp_reply_translation(struct sk_buff *skb,
int
icmp_reply_translation(struct sk_buff **pskb,
struct ip_conntrack *conntrack,
unsigned int hooknum,
int dir)
{
struct iphdr *iph = skb->nh.iph;
struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl);
struct iphdr *inner = (struct iphdr *)(hdr + 1);
size_t datalen = skb->len - ((void *)inner - (void *)iph);
struct {
struct icmphdr icmp;
struct iphdr ip;
} *inside;
unsigned int i;
struct ip_nat_info *info = &conntrack->nat.info;
IP_NF_ASSERT(skb->len >= iph->ihl*4 + sizeof(struct icmphdr));
if (!skb_ip_make_writable(pskb,(*pskb)->nh.iph->ihl*4+sizeof(*inside)))
return 0;
inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
/* We're actually going to mangle it beyond trivial checksum
adjustment, so make sure the current checksum is correct. */
if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY
&& (u16)csum_fold(skb_checksum(*pskb, (*pskb)->nh.iph->ihl*4,
(*pskb)->len, 0)))
return 0;
/* Must be RELATED */
IP_NF_ASSERT(skb->nfct - (struct ip_conntrack *)skb->nfct->master
IP_NF_ASSERT((*pskb)->nfct
- (struct ip_conntrack *)(*pskb)->nfct->master
== IP_CT_RELATED
|| skb->nfct - (struct ip_conntrack *)skb->nfct->master
|| (*pskb)->nfct
- (struct ip_conntrack *)(*pskb)->nfct->master
== IP_CT_RELATED+IP_CT_IS_REPLY);
/* Redirects on non-null nats must be dropped, else they'll
start talking to each other without our translation, and be
confused... --RR */
if (hdr->type == ICMP_REDIRECT) {
if (inside->icmp.type == ICMP_REDIRECT) {
/* Don't care about races here. */
if (info->initialized
!= ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST))
|| info->num_manips != 0)
return NF_DROP;
return 0;
}
DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n",
skb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
*pskb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
/* Note: May not be from a NAT'd host, but probably safest to
do translation always as if it came from the host itself
(even though a "host unreachable" coming from the host
......@@ -918,11 +924,13 @@ icmp_reply_translation(struct sk_buff *skb,
? "DST" : "SRC",
NIPQUAD(info->manips[i].manip.ip),
ntohs(info->manips[i].manip.u.udp.port));
manip_pkt(inner->protocol, inner,
skb->len - ((void *)inner - (void *)iph),
&info->manips[i].manip,
!info->manips[i].maniptype,
&skb->nfcache);
if (manip_pkt(inside->ip.protocol, pskb,
(*pskb)->nh.iph->ihl*4
+ sizeof(inside->icmp),
&info->manips[i].manip,
!info->manips[i].maniptype) < 0)
goto unlock_fail;
/* Outer packet needs to have IP header NATed like
it's a reply. */
......@@ -932,22 +940,82 @@ icmp_reply_translation(struct sk_buff *skb,
info->manips[i].maniptype == IP_NAT_MANIP_SRC
? "SRC" : "DST",
NIPQUAD(info->manips[i].manip.ip));
manip_pkt(0, iph, skb->len,
&info->manips[i].manip,
info->manips[i].maniptype,
&skb->nfcache);
if (manip_pkt(0, pskb, 0,
&info->manips[i].manip,
info->manips[i].maniptype) < 0)
goto unlock_fail;
}
}
READ_UNLOCK(&ip_nat_lock);
/* Since we mangled inside ICMP packet, recalculate its
checksum from scratch. (Hence the handling of incorrect
checksums in conntrack, so we don't accidentally fix one.) */
hdr->checksum = 0;
hdr->checksum = ip_compute_csum((unsigned char *)hdr,
sizeof(*hdr) + datalen);
inside->icmp.checksum = 0;
inside->icmp.checksum = csum_fold(skb_checksum(*pskb,
(*pskb)->nh.iph->ihl*4,
(*pskb)->len, 0));
return 1;
return NF_ACCEPT;
unlock_fail:
READ_UNLOCK(&ip_nat_lock);
return 0;
}
int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
{
struct sk_buff *nskb;
unsigned int iplen;
if (writable_len > (*pskb)->len)
return 0;
/* Not exclusive use of packet? Must copy. */
if (skb_shared(*pskb) || skb_cloned(*pskb))
goto copy_skb;
/* Alexey says IP hdr is always modifiable and linear, so ok. */
if (writable_len <= (*pskb)->nh.iph->ihl*4)
return 1;
iplen = writable_len - (*pskb)->nh.iph->ihl*4;
/* DaveM says protocol headers are also modifiable. */
switch ((*pskb)->nh.iph->protocol) {
case IPPROTO_TCP: {
struct tcphdr hdr;
if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
&hdr, sizeof(hdr)) != 0)
goto copy_skb;
if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4)
goto pull_skb;
goto copy_skb;
}
case IPPROTO_UDP:
if (writable_len<=(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr))
goto pull_skb;
goto copy_skb;
case IPPROTO_ICMP:
if (writable_len
<= (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr))
goto pull_skb;
goto copy_skb;
/* Insert other cases here as desired */
}
copy_skb:
nskb = skb_copy(*pskb, GFP_ATOMIC);
if (!nskb)
return 0;
BUG_ON(skb_is_nonlinear(nskb));
/* Rest of kernel will get very unhappy if we pass it a
suddenly-orphaned skbuff */
if ((*pskb)->sk)
skb_set_owner_w(nskb, (*pskb)->sk);
kfree_skb(*pskb);
*pskb = nskb;
return 1;
pull_skb:
return pskb_may_pull(*pskb, writable_len);
}
int __init ip_nat_init(void)
......
......@@ -46,14 +46,14 @@
#endif
DECLARE_LOCK(ip_nat_seqofs_lock);
static inline int
ip_nat_resize_packet(struct sk_buff **skb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
int new_size)
/* Setup TCP sequence correction given this change at this sequence */
static inline void
adjust_tcp_sequence(u32 seq,
int sizediff,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
struct iphdr *iph;
int dir;
struct ip_nat_seq *this_way, *other_way;
......@@ -65,52 +65,89 @@ ip_nat_resize_packet(struct sk_buff **skb,
this_way = &ct->nat.info.seq[dir];
other_way = &ct->nat.info.seq[!dir];
if (new_size > (*skb)->len + skb_tailroom(*skb)) {
struct sk_buff *newskb;
newskb = skb_copy_expand(*skb, skb_headroom(*skb),
new_size - (*skb)->len,
GFP_ATOMIC);
DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
DUMP_OFFSET(this_way);
if (!newskb) {
printk("ip_nat_resize_packet: oom\n");
return 0;
} else {
kfree_skb(*skb);
*skb = newskb;
}
LOCK_BH(&ip_nat_seqofs_lock);
/* SYN adjust. If it's uninitialized, of this is after last
* correction, record it: we don't handle more than one
* adjustment in the window, but do deal with common case of a
* retransmit */
if (this_way->offset_before == this_way->offset_after
|| before(this_way->correction_pos, seq)) {
this_way->correction_pos = seq;
this_way->offset_before = this_way->offset_after;
this_way->offset_after += sizediff;
}
UNLOCK_BH(&ip_nat_seqofs_lock);
iph = (*skb)->nh.iph;
if (iph->protocol == IPPROTO_TCP) {
struct tcphdr *tcph = (void *)iph + iph->ihl*4;
DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
DUMP_OFFSET(this_way);
LOCK_BH(&ip_nat_seqofs_lock);
/* SYN adjust. If it's uninitialized, of this is after last
* correction, record it: we don't handle more than one
* adjustment in the window, but do deal with common case of a
* retransmit */
if (this_way->offset_before == this_way->offset_after
|| before(this_way->correction_pos, ntohl(tcph->seq))) {
this_way->correction_pos = ntohl(tcph->seq);
this_way->offset_before = this_way->offset_after;
this_way->offset_after = (int32_t)
this_way->offset_before + new_size -
(*skb)->len;
}
DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
DUMP_OFFSET(this_way);
}
/* Frobs data inside this packet, which is linear. */
static void mangle_contents(struct sk_buff *skb,
unsigned int dataoff,
unsigned int match_offset,
unsigned int match_len,
const char *rep_buffer,
unsigned int rep_len)
{
unsigned char *data;
BUG_ON(skb_is_nonlinear(skb));
data = (unsigned char *)skb->nh.iph + dataoff;
UNLOCK_BH(&ip_nat_seqofs_lock);
/* move post-replacement */
memmove(data + match_offset + rep_len,
data + match_offset + match_len,
skb->tail - (data + match_offset + match_len));
DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
DUMP_OFFSET(this_way);
/* insert data from buffer */
memcpy(data + match_offset, rep_buffer, rep_len);
/* update skb info */
if (rep_len > match_len) {
DEBUGP("ip_nat_mangle_packet: Extending packet by "
"%u from %u bytes\n", rep_len - match_len,
skb->len);
skb_put(skb, rep_len - match_len);
} else {
DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
"%u from %u bytes\n", match_len - rep_len,
skb->len);
__skb_trim(skb, skb->len + rep_len - match_len);
}
return 1;
/* fix IP hdr checksum information */
skb->nh.iph->tot_len = htons(skb->len);
ip_send_check(skb->nh.iph);
skb->csum = csum_partial(data, skb->len - dataoff, 0);
}
/* Unusual, but possible case. */
static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
{
struct sk_buff *nskb;
if ((*pskb)->len + extra > 65535)
return 0;
nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
if (!nskb)
return 0;
/* Transfer socket to new skb. */
if ((*pskb)->sk)
skb_set_owner_w(nskb, (*pskb)->sk);
#ifdef CONFIG_NETFILTER_DEBUG
nskb->nf_debug = (*pskb)->nf_debug;
#endif
kfree_skb(*pskb);
*pskb = nskb;
return 1;
}
/* Generic function for mangling variable-length address changes inside
* NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
......@@ -121,91 +158,41 @@ ip_nat_resize_packet(struct sk_buff **skb,
*
* */
int
ip_nat_mangle_tcp_packet(struct sk_buff **skb,
ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int match_offset,
unsigned int match_len,
char *rep_buffer,
const char *rep_buffer,
unsigned int rep_len)
{
struct iphdr *iph = (*skb)->nh.iph;
struct iphdr *iph;
struct tcphdr *tcph;
unsigned char *data;
u_int32_t tcplen, newlen, newtcplen;
tcplen = (*skb)->len - iph->ihl*4;
newtcplen = tcplen - match_len + rep_len;
newlen = iph->ihl*4 + newtcplen;
if (newlen > 65535) {
if (net_ratelimit())
printk("ip_nat_mangle_tcp_packet: nat'ed packet "
"exceeds maximum packet size\n");
if (!skb_ip_make_writable(pskb, (*pskb)->len))
return 0;
}
if ((*skb)->len != newlen) {
if (!ip_nat_resize_packet(skb, ct, ctinfo, newlen)) {
printk("resize_packet failed!!\n");
return 0;
}
}
if (rep_len > match_len
&& rep_len - match_len > skb_tailroom(*pskb)
&& !enlarge_skb(pskb, rep_len - match_len))
return 0;
/* Alexey says: if a hook changes _data_ ... it can break
original packet sitting in tcp queue and this is fatal */
if (skb_cloned(*skb)) {
struct sk_buff *nskb = skb_copy(*skb, GFP_ATOMIC);
if (!nskb) {
if (net_ratelimit())
printk("Out of memory cloning TCP packet\n");
return 0;
}
/* Rest of kernel will get very unhappy if we pass it
a suddenly-orphaned skbuff */
if ((*skb)->sk)
skb_set_owner_w(nskb, (*skb)->sk);
kfree_skb(*skb);
*skb = nskb;
}
SKB_LINEAR_ASSERT(*pskb);
/* skb may be copied !! */
iph = (*skb)->nh.iph;
iph = (*pskb)->nh.iph;
tcph = (void *)iph + iph->ihl*4;
data = (void *)tcph + tcph->doff*4;
if (rep_len != match_len)
/* move post-replacement */
memmove(data + match_offset + rep_len,
data + match_offset + match_len,
(*skb)->tail - (data + match_offset + match_len));
/* insert data from buffer */
memcpy(data + match_offset, rep_buffer, rep_len);
/* update skb info */
if (newlen > (*skb)->len) {
DEBUGP("ip_nat_mangle_tcp_packet: Extending packet by "
"%u to %u bytes\n", newlen - (*skb)->len, newlen);
skb_put(*skb, newlen - (*skb)->len);
} else {
DEBUGP("ip_nat_mangle_tcp_packet: Shrinking packet from "
"%u to %u bytes\n", (*skb)->len, newlen);
skb_trim(*skb, newlen);
}
/* fix checksum information */
iph->tot_len = htons(newlen);
(*skb)->csum = csum_partial((char *)tcph + tcph->doff*4,
newtcplen - tcph->doff*4, 0);
mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
match_offset, match_len, rep_buffer, rep_len);
tcph->check = 0;
tcph->check = tcp_v4_check(tcph, newtcplen, iph->saddr, iph->daddr,
tcph->check = tcp_v4_check(tcph, (*pskb)->len - iph->ihl*4,
iph->saddr, iph->daddr,
csum_partial((char *)tcph, tcph->doff*4,
(*skb)->csum));
ip_send_check(iph);
(*pskb)->csum));
adjust_tcp_sequence(ntohl(tcph->seq),
(int)match_len - (int)rep_len,
ct, ctinfo);
return 1;
}
......@@ -220,219 +207,164 @@ ip_nat_mangle_tcp_packet(struct sk_buff **skb,
* should be fairly easy to do.
*/
int
ip_nat_mangle_udp_packet(struct sk_buff **skb,
ip_nat_mangle_udp_packet(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int match_offset,
unsigned int match_len,
char *rep_buffer,
const char *rep_buffer,
unsigned int rep_len)
{
struct iphdr *iph = (*skb)->nh.iph;
struct udphdr *udph = (void *)iph + iph->ihl * 4;
unsigned char *data;
u_int32_t udplen, newlen, newudplen;
udplen = (*skb)->len - iph->ihl*4;
newudplen = udplen - match_len + rep_len;
newlen = iph->ihl*4 + newudplen;
struct iphdr *iph;
struct udphdr *udph;
int need_csum = ((*pskb)->csum != 0);
if (newlen > 65535) {
if (net_ratelimit())
printk("ip_nat_mangle_udp_packet: nat'ed packet "
"exceeds maximum packet size\n");
if (!skb_ip_make_writable(pskb, (*pskb)->len))
return 0;
}
if ((*skb)->len != newlen) {
if (!ip_nat_resize_packet(skb, ct, ctinfo, newlen)) {
printk("resize_packet failed!!\n");
return 0;
}
}
/* Alexey says: if a hook changes _data_ ... it can break
original packet sitting in tcp queue and this is fatal */
if (skb_cloned(*skb)) {
struct sk_buff *nskb = skb_copy(*skb, GFP_ATOMIC);
if (!nskb) {
if (net_ratelimit())
printk("Out of memory cloning TCP packet\n");
return 0;
}
/* Rest of kernel will get very unhappy if we pass it
a suddenly-orphaned skbuff */
if ((*skb)->sk)
skb_set_owner_w(nskb, (*skb)->sk);
kfree_skb(*skb);
*skb = nskb;
}
if (rep_len > match_len
&& rep_len - match_len > skb_tailroom(*pskb)
&& !enlarge_skb(pskb, rep_len - match_len))
return 0;
/* skb may be copied !! */
iph = (*skb)->nh.iph;
iph = (*pskb)->nh.iph;
udph = (void *)iph + iph->ihl*4;
data = (void *)udph + sizeof(struct udphdr);
if (rep_len != match_len)
/* move post-replacement */
memmove(data + match_offset + rep_len,
data + match_offset + match_len,
(*skb)->tail - (data + match_offset + match_len));
mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
match_offset, match_len, rep_buffer, rep_len);
/* insert data from buffer */
memcpy(data + match_offset, rep_buffer, rep_len);
/* update skb info */
if (newlen > (*skb)->len) {
DEBUGP("ip_nat_mangle_udp_packet: Extending packet by "
"%u to %u bytes\n", newlen - (*skb)->len, newlen);
skb_put(*skb, newlen - (*skb)->len);
} else {
DEBUGP("ip_nat_mangle_udp_packet: Shrinking packet from "
"%u to %u bytes\n", (*skb)->len, newlen);
skb_trim(*skb, newlen);
}
/* update the length of the UDP and IP packets to the new values*/
udph->len = htons((*skb)->len - iph->ihl*4);
iph->tot_len = htons(newlen);
/* update the length of the UDP packet */
udph->len = htons((*pskb)->len - iph->ihl*4);
/* fix udp checksum if udp checksum was previously calculated */
if ((*skb)->csum != 0) {
(*skb)->csum = csum_partial((char *)udph +
sizeof(struct udphdr),
newudplen - sizeof(struct udphdr),
0);
if (need_csum) {
udph->check = 0;
udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
newudplen, IPPROTO_UDP,
csum_partial((char *)udph,
udph->check
= csum_tcpudp_magic(iph->saddr, iph->daddr,
(*pskb)->len - iph->ihl*4,
IPPROTO_UDP,
csum_partial((char *)udph,
sizeof(struct udphdr),
(*skb)->csum));
}
ip_send_check(iph);
(*pskb)->csum));
} else
(*pskb)->csum = 0;
return 1;
}
/* Adjust one found SACK option including checksum correction */
static void
sack_adjust(struct tcphdr *tcph,
unsigned char *ptr,
sack_adjust(struct sk_buff *skb,
struct tcphdr *tcph,
unsigned int sackoff,
unsigned int sackend,
struct ip_nat_seq *natseq)
{
struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
int i;
for (i = 0; i < num_sacks; i++, sp++) {
while (sackoff < sackend) {
struct tcp_sack_block *sack;
u_int32_t new_start_seq, new_end_seq;
if (after(ntohl(sp->start_seq) - natseq->offset_before,
sack = (void *)skb->data + sackoff;
if (after(ntohl(sack->start_seq) - natseq->offset_before,
natseq->correction_pos))
new_start_seq = ntohl(sp->start_seq)
new_start_seq = ntohl(sack->start_seq)
- natseq->offset_after;
else
new_start_seq = ntohl(sp->start_seq)
new_start_seq = ntohl(sack->start_seq)
- natseq->offset_before;
new_start_seq = htonl(new_start_seq);
if (after(ntohl(sp->end_seq) - natseq->offset_before,
if (after(ntohl(sack->end_seq) - natseq->offset_before,
natseq->correction_pos))
new_end_seq = ntohl(sp->end_seq)
new_end_seq = ntohl(sack->end_seq)
- natseq->offset_after;
else
new_end_seq = ntohl(sp->end_seq)
new_end_seq = ntohl(sack->end_seq)
- natseq->offset_before;
new_end_seq = htonl(new_end_seq);
DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
ntohl(sp->start_seq), new_start_seq,
ntohl(sp->end_seq), new_end_seq);
ntohl(sack->start_seq), new_start_seq,
ntohl(sack->end_seq), new_end_seq);
tcph->check =
ip_nat_cheat_check(~sp->start_seq, new_start_seq,
ip_nat_cheat_check(~sp->end_seq,
ip_nat_cheat_check(~sack->start_seq, new_start_seq,
ip_nat_cheat_check(~sack->end_seq,
new_end_seq,
tcph->check));
sp->start_seq = new_start_seq;
sp->end_seq = new_end_seq;
sack->start_seq = new_start_seq;
sack->end_seq = new_end_seq;
sackoff += sizeof(*sack);
}
}
/* TCP SACK sequence number adjustment, return 0 if sack found and adjusted */
static inline int
ip_nat_sack_adjust(struct sk_buff *skb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
/* TCP SACK sequence number adjustment */
static inline unsigned int
ip_nat_sack_adjust(struct sk_buff **pskb,
struct tcphdr *tcph,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
struct iphdr *iph;
struct tcphdr *tcph;
unsigned char *ptr;
int length, dir, sack_adjusted = 0;
unsigned int dir, optoff, optend;
iph = skb->nh.iph;
tcph = (void *)iph + iph->ihl*4;
length = (tcph->doff*4)-sizeof(struct tcphdr);
ptr = (unsigned char *)(tcph+1);
optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
if (!skb_ip_make_writable(pskb, optend))
return 0;
dir = CTINFO2DIR(ctinfo);
while (length > 0) {
int opcode = *ptr++;
int opsize;
while (optoff < optend) {
/* Usually: option, length. */
unsigned char *op = (*pskb)->data + optoff;
switch (opcode) {
switch (op[0]) {
case TCPOPT_EOL:
return !sack_adjusted;
return 1;
case TCPOPT_NOP:
length--;
optoff++;
continue;
default:
opsize = *ptr++;
if (opsize > length) /* no partial opts */
return !sack_adjusted;
if (opcode == TCPOPT_SACK) {
/* found SACK */
if((opsize >= (TCPOLEN_SACK_BASE
+TCPOLEN_SACK_PERBLOCK)) &&
!((opsize - TCPOLEN_SACK_BASE)
% TCPOLEN_SACK_PERBLOCK))
sack_adjust(tcph, ptr-2,
&ct->nat.info.seq[!dir]);
sack_adjusted = 1;
}
ptr += opsize-2;
length -= opsize;
/* no partial options */
if (optoff + 1 == optend
|| optoff + op[1] > optend
|| op[1] < 2)
return 0;
if (op[0] == TCPOPT_SACK
&& op[1] >= 2+TCPOLEN_SACK_PERBLOCK
&& ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
sack_adjust(*pskb, tcph, optoff+2,
optoff+op[1],
&ct->nat.info.seq[!dir]);
optoff += op[1];
}
}
return !sack_adjusted;
return 1;
}
/* TCP sequence number adjustment */
int
ip_nat_seq_adjust(struct sk_buff *skb,
/* TCP sequence number adjustment. Returns true or false. */
int
ip_nat_seq_adjust(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
struct iphdr *iph;
struct tcphdr *tcph;
int dir, newseq, newack;
struct ip_nat_seq *this_way, *other_way;
iph = skb->nh.iph;
tcph = (void *)iph + iph->ihl*4;
dir = CTINFO2DIR(ctinfo);
this_way = &ct->nat.info.seq[dir];
other_way = &ct->nat.info.seq[!dir];
/* No adjustments to make? Very common case. */
if (!this_way->offset_before && !this_way->offset_after
&& !other_way->offset_before && !other_way->offset_after)
return 1;
if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
return 0;
tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
if (after(ntohl(tcph->seq), this_way->correction_pos))
newseq = ntohl(tcph->seq) + this_way->offset_after;
else
......@@ -458,9 +390,7 @@ ip_nat_seq_adjust(struct sk_buff *skb,
tcph->seq = newseq;
tcph->ack_seq = newack;
ip_nat_sack_adjust(skb, ct, ctinfo);
return 0;
return ip_nat_sack_adjust(pskb, tcph, ct, ctinfo);
}
static inline int
......
......@@ -42,17 +42,24 @@ icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
return 0;
}
static void
icmp_manip_pkt(struct iphdr *iph, size_t len,
static int
icmp_manip_pkt(struct sk_buff **pskb,
unsigned int hdroff,
const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype)
{
struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl);
struct icmphdr *hdr;
if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
return 0;
hdr = (void *)(*pskb)->data + hdroff;
hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
manip->u.icmp.id,
hdr->checksum);
manip->u.icmp.id,
hdr->checksum);
hdr->un.echo.id = manip->u.icmp.id;
return 1;
}
static unsigned int
......
......@@ -7,6 +7,7 @@
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/netfilter_ipv4/ip_nat_protocol.h>
#include <linux/netfilter_ipv4/ip_nat_core.h>
static int
tcp_in_range(const struct ip_conntrack_tuple *tuple,
......@@ -73,36 +74,49 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
return 0;
}
static void
tcp_manip_pkt(struct iphdr *iph, size_t len,
static int
tcp_manip_pkt(struct sk_buff **pskb,
unsigned int hdroff,
const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype)
{
struct tcphdr *hdr = (struct tcphdr *)((u_int32_t *)iph + iph->ihl);
struct tcphdr *hdr;
u_int32_t oldip;
u_int16_t *portptr;
u_int16_t *portptr, oldport;
int hdrsize = 8; /* TCP connection tracking guarantees this much */
/* this could be a inner header returned in icmp packet; in such
cases we cannot update the checksum field since it is outside of
the 8 bytes of transport layer headers we are guaranteed */
if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr);
if (!skb_ip_make_writable(pskb, hdroff + hdrsize))
return 0;
hdr = (void *)(*pskb)->data + hdroff;
if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
oldip = iph->saddr;
oldip = (*pskb)->nh.iph->saddr;
portptr = &hdr->source;
} else {
/* Get rid of dst ip and dst pt */
oldip = iph->daddr;
oldip = (*pskb)->nh.iph->daddr;
portptr = &hdr->dest;
}
/* this could be a inner header returned in icmp packet; in such
cases we cannot update the checksum field since it is outside of
the 8 bytes of transport layer headers we are guaranteed */
if(((void *)&hdr->check + sizeof(hdr->check) - (void *)iph) <= len) {
hdr->check = ip_nat_cheat_check(~oldip, manip->ip,
ip_nat_cheat_check(*portptr ^ 0xFFFF,
oldport = *portptr;
*portptr = manip->u.tcp.port;
if (hdrsize < sizeof(*hdr))
return 1;
hdr->check = ip_nat_cheat_check(~oldip, manip->ip,
ip_nat_cheat_check(oldport ^ 0xFFFF,
manip->u.tcp.port,
hdr->check));
}
*portptr = manip->u.tcp.port;
return 1;
}
static unsigned int
......
......@@ -72,22 +72,27 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple,
return 0;
}
static void
udp_manip_pkt(struct iphdr *iph, size_t len,
static int
udp_manip_pkt(struct sk_buff **pskb,
unsigned int hdroff,
const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype)
{
struct udphdr *hdr = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
struct udphdr *hdr;
u_int32_t oldip;
u_int16_t *portptr;
if (!skb_ip_make_writable(pskb, hdroff + sizeof(hdr)))
return 0;
hdr = (void *)(*pskb)->data + hdroff;
if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
oldip = iph->saddr;
oldip = (*pskb)->nh.iph->saddr;
portptr = &hdr->source;
} else {
/* Get rid of dst ip and dst pt */
oldip = iph->daddr;
oldip = (*pskb)->nh.iph->daddr;
portptr = &hdr->dest;
}
if (hdr->check) /* 0 is a special case meaning no checksum */
......@@ -96,6 +101,7 @@ udp_manip_pkt(struct iphdr *iph, size_t len,
manip->u.udp.port,
hdr->check));
*portptr = manip->u.udp.port;
return 1;
}
static unsigned int
......
......@@ -29,12 +29,13 @@ static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
return 0;
}
static void
unknown_manip_pkt(struct iphdr *iph, size_t len,
static int
unknown_manip_pkt(struct sk_buff **pskb,
unsigned int hdroff,
const struct ip_conntrack_manip *manip,
enum ip_nat_manip_type maniptype)
{
return;
return 1;
}
static unsigned int
......
......@@ -71,10 +71,6 @@ ip_nat_fn(unsigned int hooknum,
/* maniptype == SRC for postrouting. */
enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
/* FIXME: Push down to extensions --RR */
if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
return NF_DROP;
/* We never see fragments: conntrack defrags on pre-routing
and local-out, and ip_nat_out protects post-routing. */
IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
......@@ -95,12 +91,14 @@ ip_nat_fn(unsigned int hooknum,
/* Exception: ICMP redirect to new connection (not in
hash table yet). We must not let this through, in
case we're doing NAT to the same network. */
struct iphdr *iph = (*pskb)->nh.iph;
struct icmphdr *hdr = (struct icmphdr *)
((u_int32_t *)iph + iph->ihl);
if (iph->protocol == IPPROTO_ICMP
&& hdr->type == ICMP_REDIRECT)
return NF_DROP;
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
struct icmphdr hdr;
if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
&hdr, sizeof(hdr)) == 0
&& hdr.type == ICMP_REDIRECT)
return NF_DROP;
}
return NF_ACCEPT;
}
......@@ -108,8 +106,11 @@ ip_nat_fn(unsigned int hooknum,
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
return icmp_reply_translation(*pskb, ct, hooknum,
CTINFO2DIR(ctinfo));
if (!icmp_reply_translation(pskb, ct, hooknum,
CTINFO2DIR(ctinfo)))
return NF_DROP;
else
return NF_ACCEPT;
}
/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
case IP_CT_NEW:
......@@ -174,10 +175,6 @@ ip_nat_out(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* FIXME: Push down to extensions --RR */
if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
return NF_DROP;
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
|| (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
......@@ -213,10 +210,6 @@ ip_nat_local_fn(unsigned int hooknum,
u_int32_t saddr, daddr;
unsigned int ret;
/* FIXME: Push down to extensions --RR */
if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
return NF_DROP;
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
|| (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
......@@ -387,4 +380,5 @@ EXPORT_SYMBOL(ip_nat_cheat_check);
EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
EXPORT_SYMBOL(ip_nat_used_tuple);
EXPORT_SYMBOL(skb_ip_make_writable);
MODULE_LICENSE("GPL");
......@@ -57,9 +57,7 @@ tftp_nat_help(struct ip_conntrack *ct,
struct sk_buff **pskb)
{
int dir = CTINFO2DIR(ctinfo);
struct iphdr *iph = (*pskb)->nh.iph;
struct udphdr *udph = (void *)iph + iph->ihl * 4;
struct tftphdr *tftph = (void *)udph + 8;
struct tftphdr tftph;
struct ip_conntrack_tuple repl;
if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL)
......@@ -71,7 +69,11 @@ tftp_nat_help(struct ip_conntrack *ct,
return NF_ACCEPT;
}
switch (ntohs(tftph->opcode)) {
if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
&tftph, sizeof(tftph)) != 0)
return NF_DROP;
switch (ntohs(tftph.opcode)) {
/* RRQ and WRQ works the same way */
case TFTP_OPCODE_READ:
case TFTP_OPCODE_WRITE:
......@@ -104,8 +106,10 @@ tftp_nat_expected(struct sk_buff **pskb,
#if 0
const struct ip_conntrack_tuple *repl =
&master->tuplehash[IP_CT_DIR_REPLY].tuple;
struct iphdr *iph = (*pskb)->nh.iph;
struct udphdr *udph = (void *)iph + iph->ihl*4;
struct udphdr udph;
if (skb_copy_bits(*pskb,(*pskb)->nh.iph->ihl*4,&udph,sizeof(udph))!=0)
return NF_DROP;
#endif
IP_NF_ASSERT(info);
......@@ -119,8 +123,8 @@ tftp_nat_expected(struct sk_buff **pskb,
mr.range[0].min_ip = mr.range[0].max_ip = orig->dst.ip;
DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u "
"newsrc: %u.%u.%u.%u\n",
NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph->source),
NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph->dest),
NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph.source),
NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph.dest),
NIPQUAD(orig->dst.ip));
} else {
mr.range[0].min_ip = mr.range[0].max_ip = orig->src.ip;
......@@ -130,8 +134,8 @@ tftp_nat_expected(struct sk_buff **pskb,
DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u "
"newdst: %u.%u.%u.%u:%u\n",
NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph->source),
NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph->dest),
NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph.source),
NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph.dest),
NIPQUAD(orig->src.ip), ntohs(orig->src.u.udp.port));
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment