Commit 89d5e232 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Conflicts:
	net/netfilter/nf_conntrack_proto_tcp.c

The conflict had to do with overlapping changes dealing with
fixing the use of an "s32" to hold the value returned by
NAT_OFFSET().

Pablo Neira Ayuso says:

====================
The following batch contains Netfilter/IPVS updates for your net-next tree.
More specifically, they are:

* Trivial typo fix in xt_addrtype, from Phil Oester.

* Remove net_ratelimit in the conntrack logging for consistency with other
  logging subsystem, from Patrick McHardy.

* Remove unneeded includes from the recently added xt_connlabel support, from
  Florian Westphal.

* Allow to update conntracks via nfqueue, don't need NFQA_CFG_F_CONNTRACK for
  this, from Florian Westphal.

* Remove tproxy core, now that we have socket early demux, from Florian
  Westphal.

* A couple of patches to refactor conntrack event reporting to save a good
  bunch of lines, from Florian Westphal.

* Fix missing locking in NAT sequence adjustment, it did not manifested in
  any known bug so far, from Patrick McHardy.

* Change sequence number adjustment variable to 32 bits, to delay the
  possible early overflow in long standing connections, also from Patrick.

* Comestic cleanups for IPVS, from Dragos Foianu.

* Fix possible null dereference in IPVS in the SH scheduler, from Daniel
  Borkmann.

* Allow to attach conntrack expectations via nfqueue. Before this patch, you
  had to use ctnetlink instead, thus, we save the conntrack lookup.

* Export xt_rpfilter and xt_HMARK header files, from Nicolas Dichtel.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 15ec80f5 38c67328
......@@ -2,9 +2,8 @@ Transparent proxy support
=========================
This feature adds Linux 2.2-like transparent proxy support to current kernels.
To use it, enable NETFILTER_TPROXY, the socket match and the TPROXY target in
your kernel config. You will need policy routing too, so be sure to enable that
as well.
To use it, enable the socket match and the TPROXY target in your kernel config.
You will need policy routing too, so be sure to enable that as well.
1. Making non-local sockets work
......
......@@ -314,8 +314,8 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
#endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu;
extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
extern void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
struct nf_conn;
......@@ -325,12 +325,14 @@ struct nfq_ct_hook {
size_t (*build_size)(const struct nf_conn *ct);
int (*build)(struct sk_buff *skb, struct nf_conn *ct);
int (*parse)(const struct nlattr *attr, struct nf_conn *ct);
int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct,
u32 portid, u32 report);
};
extern struct nfq_ct_hook __rcu *nfq_ct_hook;
struct nfq_ct_nat_hook {
void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct,
u32 ctinfo, int off);
u32 ctinfo, s32 off);
};
extern struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook;
#else
......
......@@ -181,8 +181,7 @@ __nf_conntrack_find(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple);
extern int nf_conntrack_hash_check_insert(struct nf_conn *ct);
extern void nf_ct_delete_from_lists(struct nf_conn *ct);
extern void nf_ct_dying_timeout(struct nf_conn *ct);
bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
extern void nf_conntrack_flush_report(struct net *net, u32 portid, int report);
......@@ -235,7 +234,7 @@ static inline bool nf_ct_kill(struct nf_conn *ct)
}
/* These are for NAT. Icky. */
extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq);
......@@ -249,7 +248,9 @@ extern void nf_ct_untracked_status_or(unsigned long bits);
/* Iterate over all conntracks: if iter returns true, it's deleted. */
extern void
nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data);
nf_ct_iterate_cleanup(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report);
extern void nf_conntrack_free(struct nf_conn *ct);
extern struct nf_conn *
nf_conntrack_alloc(struct net *net, u16 zone,
......
......@@ -148,17 +148,10 @@ extern int nf_ct_port_nlattr_tuple_size(void);
extern const struct nla_policy nf_ct_port_nla_policy[];
#ifdef CONFIG_SYSCTL
#ifdef DEBUG_INVALID_PACKETS
#define LOG_INVALID(net, proto) \
((net)->ct.sysctl_log_invalid == (proto) || \
(net)->ct.sysctl_log_invalid == IPPROTO_RAW)
#else
#define LOG_INVALID(net, proto) \
(((net)->ct.sysctl_log_invalid == (proto) || \
(net)->ct.sysctl_log_invalid == IPPROTO_RAW) \
&& net_ratelimit())
#endif
#else
static inline int LOG_INVALID(struct net *net, int proto) { return 0; }
#endif /* CONFIG_SYSCTL */
......
......@@ -19,7 +19,7 @@ struct nf_nat_seq {
u_int32_t correction_pos;
/* sequence number offset before and after last modification */
int16_t offset_before, offset_after;
int32_t offset_before, offset_after;
};
#include <linux/list.h>
......
......@@ -41,7 +41,7 @@ extern int nf_nat_mangle_udp_packet(struct sk_buff *skb,
extern void nf_nat_set_seq_adjust(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
__be32 seq, s16 off);
__be32 seq, s32 off);
extern int nf_nat_seq_adjust(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
......@@ -56,11 +56,11 @@ extern int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
extern void nf_nat_follow_master(struct nf_conn *ct,
struct nf_conntrack_expect *this);
extern s16 nf_nat_get_offset(const struct nf_conn *ct,
extern s32 nf_nat_get_offset(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq);
extern void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
u32 dir, int off);
u32 dir, s32 off);
#endif
#ifndef _NF_TPROXY_CORE_H
#define _NF_TPROXY_CORE_H
#include <linux/types.h>
#include <linux/in.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/tcp.h>
#define NFT_LOOKUP_ANY 0
#define NFT_LOOKUP_LISTENER 1
#define NFT_LOOKUP_ESTABLISHED 2
/* look up and get a reference to a matching socket */
/* This function is used by the 'TPROXY' target and the 'socket'
* match. The following lookups are supported:
*
* Explicit TProxy target rule
* ===========================
*
* This is used when the user wants to intercept a connection matching
* an explicit iptables rule. In this case the sockets are assumed
* matching in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple, it is returned, assuming the redirection
* already took place and we process a packet belonging to an
* established connection
*
* - match: if there's a listening socket matching the redirection
* (e.g. on-port & on-ip of the connection), it is returned,
* regardless if it was bound to 0.0.0.0 or an explicit
* address. The reasoning is that if there's an explicit rule, it
* does not really matter if the listener is bound to an interface
* or to 0. The user already stated that he wants redirection
* (since he added the rule).
*
* "socket" match based redirection (no specific rule)
* ===================================================
*
* There are connections with dynamic endpoints (e.g. FTP data
* connection) that the user is unable to add explicit rules
* for. These are taken care of by a generic "socket" rule. It is
* assumed that the proxy application is trusted to open such
* connections without explicit iptables rule (except of course the
* generic 'socket' rule). In this case the following sockets are
* matched in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple
*
* - match: if there's a non-zero bound listener (possibly with a
* non-local address) We don't accept zero-bound listeners, since
* then local services could intercept traffic going through the
* box.
*
* Please note that there's an overlap between what a TPROXY target
* and a socket match will match. Normally if you have both rules the
* "socket" match will be the first one, effectively all packets
* belonging to established connections going through that one.
*/
static inline struct sock *
nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in, int lookup_type)
{
struct sock *sk;
/* look up socket */
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_ANY:
sk = __inet_lookup(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
case NFT_LOOKUP_LISTENER:
sk = inet_lookup_listener(net, &tcp_hashinfo,
saddr, sport,
daddr, dport,
in->ifindex);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too */
break;
case NFT_LOOKUP_ESTABLISHED:
sk = inet_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
default:
WARN_ON(1);
sk = NULL;
break;
}
break;
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk && lookup_type != NFT_LOOKUP_ANY) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too */
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
return sk;
}
#if IS_ENABLED(CONFIG_IPV6)
static inline struct sock *
nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in, int lookup_type)
{
struct sock *sk;
/* look up socket */
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_ANY:
sk = inet6_lookup(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
case NFT_LOOKUP_LISTENER:
sk = inet6_lookup_listener(net, &tcp_hashinfo,
saddr, sport,
daddr, ntohs(dport),
in->ifindex);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too */
break;
case NFT_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
in->ifindex);
break;
default:
WARN_ON(1);
sk = NULL;
break;
}
break;
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk && lookup_type != NFT_LOOKUP_ANY) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too */
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
return sk;
}
#endif
/* assign a socket to the skb -- consumes sk */
void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk);
#endif
......@@ -15,6 +15,8 @@ int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo);
void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, int diff);
int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
u32 portid, u32 report);
#else
inline struct nf_conn *
nfqnl_ct_get(struct sk_buff *entskb, size_t *size, enum ip_conntrack_info *ctinfo)
......@@ -39,5 +41,11 @@ inline void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, int diff)
{
}
inline int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
u32 portid, u32 report)
{
return 0;
}
#endif /* NF_CONNTRACK */
#endif
......@@ -22,6 +22,7 @@ header-y += xt_CONNMARK.h
header-y += xt_CONNSECMARK.h
header-y += xt_CT.h
header-y += xt_DSCP.h
header-y += xt_HMARK.h
header-y += xt_IDLETIMER.h
header-y += xt_LED.h
header-y += xt_LOG.h
......@@ -68,6 +69,7 @@ header-y += xt_quota.h
header-y += xt_rateest.h
header-y += xt_realm.h
header-y += xt_recent.h
header-y += xt_rpfilter.h
header-y += xt_sctp.h
header-y += xt_set.h
header-y += xt_socket.h
......
......@@ -46,6 +46,7 @@ enum nfqnl_attr_type {
NFQA_CT_INFO, /* enum ip_conntrack_info */
NFQA_CAP_LEN, /* __u32 length of captured packet */
NFQA_SKB_INFO, /* __u32 skb meta information */
NFQA_EXP, /* nf_conntrack_netlink.h */
__NFQA_MAX
};
......
......@@ -118,7 +118,7 @@ static int masq_device_event(struct notifier_block *this,
NF_CT_ASSERT(dev->ifindex != 0);
nf_ct_iterate_cleanup(net, device_cmp,
(void *)(long)dev->ifindex);
(void *)(long)dev->ifindex, 0, 0);
}
return NOTIFY_DONE;
......
......@@ -76,7 +76,7 @@ static int masq_device_event(struct notifier_block *this,
if (event == NETDEV_DOWN)
nf_ct_iterate_cleanup(net, device_cmp,
(void *)(long)dev->ifindex);
(void *)(long)dev->ifindex, 0, 0);
return NOTIFY_DONE;
}
......
......@@ -410,20 +410,6 @@ config NF_NAT_TFTP
endif # NF_CONNTRACK
# transparent proxy support
config NETFILTER_TPROXY
tristate "Transparent proxying support"
depends on IP_NF_MANGLE
depends on NETFILTER_ADVANCED
help
This option enables transparent proxying support, that is,
support for handling non-locally bound IPv4 TCP and UDP sockets.
For it to work you will have to configure certain iptables rules
and use policy routing. For more information on how to set it up
see Documentation/networking/tproxy.txt.
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
......@@ -720,10 +706,10 @@ config NETFILTER_XT_TARGET_TEE
this clone be rerouted to another nexthop.
config NETFILTER_XT_TARGET_TPROXY
tristate '"TPROXY" target support'
depends on NETFILTER_TPROXY
tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
......@@ -731,6 +717,9 @@ config NETFILTER_XT_TARGET_TPROXY
REDIRECT. It can only be used in the mangle table and is useful
to redirect traffic to a transparent proxy. It does _not_ depend
on Netfilter connection tracking and NAT, unlike REDIRECT.
For it to work you will have to configure certain iptables rules
and use policy routing. For more information on how to set it up
see Documentation/networking/tproxy.txt.
To compile it as a module, choose M here. If unsure, say N.
......@@ -1180,7 +1169,6 @@ config NETFILTER_XT_MATCH_SCTP
config NETFILTER_XT_MATCH_SOCKET
tristate '"socket" match support'
depends on NETFILTER_TPROXY
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
depends on !NF_CONNTRACK || NF_CONNTRACK
......
......@@ -61,9 +61,6 @@ obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# transparent proxy support
obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
......
......@@ -234,12 +234,13 @@ EXPORT_SYMBOL(skb_make_writable);
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
__rcu __read_mostly;
EXPORT_SYMBOL(ip_ct_attach);
void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
{
void (*attach)(struct sk_buff *, struct sk_buff *);
void (*attach)(struct sk_buff *, const struct sk_buff *);
if (skb->nfct) {
rcu_read_lock();
......
......@@ -414,7 +414,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
spin_lock_bh(&svc->sched_lock);
tbl->dead = 1;
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en);
}
......@@ -440,7 +440,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
struct ip_vs_lblcr_entry *en;
struct hlist_node *next;
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
spin_lock(&svc->sched_lock);
......@@ -495,7 +495,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
if (goal > tbl->max_size/2)
goal = tbl->max_size/2;
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
spin_lock(&svc->sched_lock);
......@@ -536,7 +536,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
/*
* Initialize the hash buckets
*/
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
......
......@@ -269,14 +269,20 @@ ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
switch (iph->protocol) {
case IPPROTO_TCP:
th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
if (unlikely(th == NULL))
return 0;
port = th->source;
break;
case IPPROTO_UDP:
uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
if (unlikely(uh == NULL))
return 0;
port = uh->source;
break;
case IPPROTO_SCTP:
sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
if (unlikely(sh == NULL))
return 0;
port = sh->source;
break;
default:
......
......@@ -238,7 +238,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_conntrack_free(ct);
}
void nf_ct_delete_from_lists(struct nf_conn *ct)
static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
......@@ -253,7 +253,6 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
&net->ct.dying);
spin_unlock_bh(&nf_conntrack_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
static void death_by_event(unsigned long ul_conntrack)
{
......@@ -275,7 +274,7 @@ static void death_by_event(unsigned long ul_conntrack)
nf_ct_put(ct);
}
void nf_ct_dying_timeout(struct nf_conn *ct)
static void nf_ct_dying_timeout(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
......@@ -288,27 +287,33 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
(prandom_u32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ecache->timeout);
}
EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
static void death_by_timeout(unsigned long ul_conntrack)
bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
struct nf_conn *ct = (void *)ul_conntrack;
struct nf_conn_tstamp *tstamp;
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_to_ns(ktime_get_real());
if (!test_bit(IPS_DYING_BIT, &ct->status) &&
unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
if (!nf_ct_is_dying(ct) &&
unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0)) {
/* destroy event was not delivered */
nf_ct_delete_from_lists(ct);
nf_ct_dying_timeout(ct);
return;
return false;
}
set_bit(IPS_DYING_BIT, &ct->status);
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
return true;
}
EXPORT_SYMBOL_GPL(nf_ct_delete);
static void death_by_timeout(unsigned long ul_conntrack)
{
nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
}
/*
......@@ -643,10 +648,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
return dropped;
if (del_timer(&ct->timeout)) {
death_by_timeout((unsigned long)ct);
/* Check if we indeed killed this entry. Reliable event
delivery may have inserted it into the dying list. */
if (test_bit(IPS_DYING_BIT, &ct->status)) {
if (nf_ct_delete(ct, 0, 0)) {
dropped = 1;
NF_CT_STAT_INC_ATOMIC(net, early_drop);
}
......@@ -1192,7 +1194,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
#endif
/* Used by ipt_REJECT and ip6t_REJECT. */
static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
......@@ -1244,7 +1246,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
void nf_ct_iterate_cleanup(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data)
void *data, u32 portid, int report)
{
struct nf_conn *ct;
unsigned int bucket = 0;
......@@ -1252,7 +1254,8 @@ void nf_ct_iterate_cleanup(struct net *net,
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
death_by_timeout((unsigned long)ct);
nf_ct_delete(ct, portid, report);
/* ... else the timer will get him soon. */
nf_ct_put(ct);
......@@ -1260,30 +1263,6 @@ void nf_ct_iterate_cleanup(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
struct __nf_ct_flush_report {
u32 portid;
int report;
};
static int kill_report(struct nf_conn *i, void *data)
{
struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
struct nf_conn_tstamp *tstamp;
tstamp = nf_conn_tstamp_find(i);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_to_ns(ktime_get_real());
/* If we fail to deliver the event, death_by_timeout() will retry */
if (nf_conntrack_event_report(IPCT_DESTROY, i,
fr->portid, fr->report) < 0)
return 1;
/* Avoid the delivery of the destroy event in death_by_timeout(). */
set_bit(IPS_DYING_BIT, &i->status);
return 1;
}
static int kill_all(struct nf_conn *i, void *data)
{
return 1;
......@@ -1301,11 +1280,7 @@ EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
{
struct __nf_ct_flush_report fr = {
.portid = portid,
.report = report,
};
nf_ct_iterate_cleanup(net, kill_report, &fr);
nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
......@@ -1386,7 +1361,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
nf_ct_iterate_cleanup(net, kill_all, NULL);
nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
nf_ct_release_dying_list(net);
if (atomic_read(&net->ct.count) != 0)
busy = 1;
......@@ -1692,7 +1667,7 @@ int nf_conntrack_init_net(struct net *net)
return ret;
}
s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq);
EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
......@@ -8,12 +8,8 @@
* published by the Free Software Foundation.
*/
#include <linux/ctype.h>
#include <linux/export.h>
#include <linux/jhash.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
......
......@@ -1038,21 +1038,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
}
}
if (del_timer(&ct->timeout)) {
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
NETLINK_CB(skb).portid,
nlmsg_report(nlh)) < 0) {
nf_ct_delete_from_lists(ct);
/* we failed to report the event, try later */
nf_ct_dying_timeout(ct);
nf_ct_put(ct);
return 0;
}
/* death_by_timeout would report the event again */
set_bit(IPS_DYING_BIT, &ct->status);
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
}
if (del_timer(&ct->timeout))
nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
nf_ct_put(ct);
return 0;
......@@ -1999,6 +1987,27 @@ ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb,
return err == -EAGAIN ? -ENOBUFS : err;
}
static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
[CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
[CTA_EXPECT_MASK] = { .type = NLA_NESTED },
[CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
[CTA_EXPECT_ID] = { .type = NLA_U32 },
[CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
[CTA_EXPECT_ZONE] = { .type = NLA_U16 },
[CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
[CTA_EXPECT_CLASS] = { .type = NLA_U32 },
[CTA_EXPECT_NAT] = { .type = NLA_NESTED },
[CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
};
static struct nf_conntrack_expect *
ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
struct nf_conntrack_helper *helper,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask);
#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
static size_t
ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
......@@ -2139,10 +2148,69 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
return ret;
}
static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
const struct nf_conn *ct,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask)
{
int err;
err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
nf_ct_l3num(ct));
if (err < 0)
return err;
return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
nf_ct_l3num(ct));
}
static int
ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
u32 portid, u32 report)
{
struct nlattr *cda[CTA_EXPECT_MAX+1];
struct nf_conntrack_tuple tuple, mask;
struct nf_conntrack_helper *helper;
struct nf_conntrack_expect *exp;
int err;
err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy);
if (err < 0)
return err;
err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
ct, &tuple, &mask);
if (err < 0)
return err;
if (cda[CTA_EXPECT_HELP_NAME]) {
const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL)
return -EOPNOTSUPP;
}
exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
helper, &tuple, &mask);
if (IS_ERR(exp))
return PTR_ERR(exp);
err = nf_ct_expect_related_report(exp, portid, report);
if (err < 0) {
nf_ct_expect_put(exp);
return err;
}
return 0;
}
static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
.build_size = ctnetlink_nfqueue_build_size,
.build = ctnetlink_nfqueue_build,
.parse = ctnetlink_nfqueue_parse,
.attach_expect = ctnetlink_nfqueue_attach_expect,
};
#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
......@@ -2510,21 +2578,6 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
return err;
}
static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
[CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
[CTA_EXPECT_MASK] = { .type = NLA_NESTED },
[CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
[CTA_EXPECT_ID] = { .type = NLA_U32 },
[CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
[CTA_EXPECT_ZONE] = { .type = NLA_U16 },
[CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
[CTA_EXPECT_CLASS] = { .type = NLA_U32 },
[CTA_EXPECT_NAT] = { .type = NLA_NESTED },
[CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
};
static int
ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
......@@ -2747,76 +2800,26 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
#endif
}
static int
ctnetlink_create_expect(struct net *net, u16 zone,
const struct nlattr * const cda[],
u_int8_t u3,
u32 portid, int report)
static struct nf_conntrack_expect *
ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
struct nf_conntrack_helper *helper,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask)
{
struct nf_conntrack_tuple tuple, mask, master_tuple;
struct nf_conntrack_tuple_hash *h = NULL;
u_int32_t class = 0;
struct nf_conntrack_expect *exp;
struct nf_conn *ct;
struct nf_conn_help *help;
struct nf_conntrack_helper *helper = NULL;
u_int32_t class = 0;
int err = 0;
/* caller guarantees that those three CTA_EXPECT_* exist */
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
if (err < 0)
return err;
err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
if (err < 0)
return err;
err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
if (err < 0)
return err;
/* Look for master conntrack of this expectation */
h = nf_conntrack_find_get(net, zone, &master_tuple);
if (!h)
return -ENOENT;
ct = nf_ct_tuplehash_to_ctrack(h);
/* Look for helper of this expectation */
if (cda[CTA_EXPECT_HELP_NAME]) {
const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL) {
#ifdef CONFIG_MODULES
if (request_module("nfct-helper-%s", helpname) < 0) {
err = -EOPNOTSUPP;
goto out;
}
helper = __nf_conntrack_helper_find(helpname,
nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper) {
err = -EAGAIN;
goto out;
}
#endif
err = -EOPNOTSUPP;
goto out;
}
}
int err;
if (cda[CTA_EXPECT_CLASS] && helper) {
class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
if (class > helper->expect_class_max) {
err = -EINVAL;
goto out;
}
if (class > helper->expect_class_max)
return ERR_PTR(-EINVAL);
}
exp = nf_ct_expect_alloc(ct);
if (!exp) {
err = -ENOMEM;
goto out;
}
if (!exp)
return ERR_PTR(-ENOMEM);
help = nfct_help(ct);
if (!help) {
if (!cda[CTA_EXPECT_TIMEOUT]) {
......@@ -2854,21 +2857,89 @@ ctnetlink_create_expect(struct net *net, u16 zone,
exp->class = class;
exp->master = ct;
exp->helper = helper;
memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3));
exp->mask.src.u.all = mask.src.u.all;
exp->tuple = *tuple;
exp->mask.src.u3 = mask->src.u3;
exp->mask.src.u.all = mask->src.u.all;
if (cda[CTA_EXPECT_NAT]) {
err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT],
exp, u3);
exp, nf_ct_l3num(ct));
if (err < 0)
goto err_out;
}
err = nf_ct_expect_related_report(exp, portid, report);
return exp;
err_out:
nf_ct_expect_put(exp);
out:
nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
return ERR_PTR(err);
}
static int
ctnetlink_create_expect(struct net *net, u16 zone,
const struct nlattr * const cda[],
u_int8_t u3, u32 portid, int report)
{
struct nf_conntrack_tuple tuple, mask, master_tuple;
struct nf_conntrack_tuple_hash *h = NULL;
struct nf_conntrack_helper *helper = NULL;
struct nf_conntrack_expect *exp;
struct nf_conn *ct;
int err;
/* caller guarantees that those three CTA_EXPECT_* exist */
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
if (err < 0)
return err;
err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
if (err < 0)
return err;
err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
if (err < 0)
return err;
/* Look for master conntrack of this expectation */
h = nf_conntrack_find_get(net, zone, &master_tuple);
if (!h)
return -ENOENT;
ct = nf_ct_tuplehash_to_ctrack(h);
if (cda[CTA_EXPECT_HELP_NAME]) {
const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
helper = __nf_conntrack_helper_find(helpname, u3,
nf_ct_protonum(ct));
if (helper == NULL) {
#ifdef CONFIG_MODULES
if (request_module("nfct-helper-%s", helpname) < 0) {
err = -EOPNOTSUPP;
goto err_ct;
}
helper = __nf_conntrack_helper_find(helpname, u3,
nf_ct_protonum(ct));
if (helper) {
err = -EAGAIN;
goto err_ct;
}
#endif
err = -EOPNOTSUPP;
goto err_ct;
}
}
exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
if (IS_ERR(exp)) {
err = PTR_ERR(exp);
goto err_ct;
}
err = nf_ct_expect_related_report(exp, portid, report);
if (err < 0)
goto err_exp;
return 0;
err_exp:
nf_ct_expect_put(exp);
err_ct:
nf_ct_put(ct);
return err;
}
......
......@@ -281,7 +281,7 @@ void nf_ct_l3proto_pernet_unregister(struct net *net,
nf_ct_l3proto_unregister_sysctl(net, proto);
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(net, kill_l3proto, proto);
nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
......@@ -476,7 +476,7 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
......
......@@ -496,7 +496,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
}
#ifdef CONFIG_NF_NAT_NEEDED
static inline s16 nat_offset(const struct nf_conn *ct,
static inline s32 nat_offset(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq)
{
......@@ -525,7 +525,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
s16 receiver_offset;
s32 receiver_offset;
bool res, in_recv_win;
/*
......
......@@ -497,7 +497,7 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
rtnl_lock();
for_each_net(net)
nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
rtnl_unlock();
}
......@@ -511,7 +511,7 @@ static void nf_nat_l3proto_clean(u8 l3proto)
rtnl_lock();
for_each_net(net)
nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
rtnl_unlock();
}
......@@ -749,7 +749,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{
struct nf_nat_proto_clean clean = {};
nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean);
nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
synchronize_rcu();
nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
......
......@@ -30,8 +30,6 @@
pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
x->offset_before, x->offset_after, x->correction_pos);
static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
/* Setup TCP sequence correction given this change at this sequence */
static inline void
adjust_tcp_sequence(u32 seq,
......@@ -49,7 +47,7 @@ adjust_tcp_sequence(u32 seq,
pr_debug("adjust_tcp_sequence: Seq_offset before: ");
DUMP_OFFSET(this_way);
spin_lock_bh(&nf_nat_seqofs_lock);
spin_lock_bh(&ct->lock);
/* SYN adjust. If it's uninitialized, or this is after last
* correction, record it: we don't handle more than one
......@@ -61,31 +59,26 @@ adjust_tcp_sequence(u32 seq,
this_way->offset_before = this_way->offset_after;
this_way->offset_after += sizediff;
}
spin_unlock_bh(&nf_nat_seqofs_lock);
spin_unlock_bh(&ct->lock);
pr_debug("adjust_tcp_sequence: Seq_offset after: ");
DUMP_OFFSET(this_way);
}
/* Get the offset value, for conntrack */
s16 nf_nat_get_offset(const struct nf_conn *ct,
/* Get the offset value, for conntrack. Caller must have the conntrack locked */
s32 nf_nat_get_offset(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq)
{
struct nf_conn_nat *nat = nfct_nat(ct);
struct nf_nat_seq *this_way;
s16 offset;
if (!nat)
return 0;
this_way = &nat->seq[dir];
spin_lock_bh(&nf_nat_seqofs_lock);
offset = after(seq, this_way->correction_pos)
return after(seq, this_way->correction_pos)
? this_way->offset_after : this_way->offset_before;
spin_unlock_bh(&nf_nat_seqofs_lock);
return offset;
}
/* Frobs data inside this packet, which is linear. */
......@@ -143,7 +136,7 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
}
void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
__be32 seq, s16 off)
__be32 seq, s32 off)
{
if (!off)
return;
......@@ -370,9 +363,10 @@ nf_nat_seq_adjust(struct sk_buff *skb,
struct tcphdr *tcph;
int dir;
__be32 newseq, newack;
s16 seqoff, ackoff;
s32 seqoff, ackoff;
struct nf_conn_nat *nat = nfct_nat(ct);
struct nf_nat_seq *this_way, *other_way;
int res;
dir = CTINFO2DIR(ctinfo);
......@@ -383,6 +377,7 @@ nf_nat_seq_adjust(struct sk_buff *skb,
return 0;
tcph = (void *)skb->data + protoff;
spin_lock_bh(&ct->lock);
if (after(ntohl(tcph->seq), this_way->correction_pos))
seqoff = this_way->offset_after;
else
......@@ -407,7 +402,10 @@ nf_nat_seq_adjust(struct sk_buff *skb,
tcph->seq = newseq;
tcph->ack_seq = newack;
return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
res = nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
spin_unlock_bh(&ct->lock);
return res;
}
/* Setup NAT on this expected conntrack so it follows master. */
......
/*
* Transparent proxy support for Linux/iptables
*
* Copyright (c) 2006-2007 BalaBit IT Ltd.
* Author: Balazs Scheidler, Krisztian Kovacs
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <linux/module.h>
#include <linux/net.h>
#include <linux/if.h>
#include <linux/netdevice.h>
#include <net/udp.h>
#include <net/netfilter/nf_tproxy_core.h>
static void
nf_tproxy_destructor(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
skb->sk = NULL;
skb->destructor = NULL;
if (sk)
sock_put(sk);
}
/* consumes sk */
void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
{
/* assigning tw sockets complicates things; most
* skb->sk->X checks would have to test sk->sk_state first */
if (sk->sk_state == TCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
return;
}
skb_orphan(skb);
skb->sk = sk;
skb->destructor = nf_tproxy_destructor;
}
EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
static int __init nf_tproxy_init(void)
{
pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
return 0;
}
module_init(nf_tproxy_init);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Krisztian Kovacs");
MODULE_DESCRIPTION("Transparent proxy support core routines");
......@@ -862,6 +862,7 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
[NFQA_MARK] = { .type = NLA_U32 },
[NFQA_PAYLOAD] = { .type = NLA_UNSPEC },
[NFQA_CT] = { .type = NLA_UNSPEC },
[NFQA_EXP] = { .type = NLA_UNSPEC },
};
static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
......@@ -990,9 +991,14 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (entry == NULL)
return -ENOENT;
rcu_read_lock();
if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK))
if (nfqa[NFQA_CT]) {
ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
if (ct && nfqa[NFQA_EXP]) {
nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
NETLINK_CB(skb).portid,
nlmsg_report(nlh));
}
}
if (nfqa[NFQA_PAYLOAD]) {
u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
......@@ -1005,7 +1011,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (ct)
nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff);
}
rcu_read_unlock();
if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
......
......@@ -96,3 +96,18 @@ void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
if ((ct->status & IPS_NAT_MASK) && diff)
nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff);
}
int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
u32 portid, u32 report)
{
struct nfq_ct_hook *nfq_ct;
if (nf_ct_is_untracked(ct))
return 0;
nfq_ct = rcu_dereference(nfq_ct_hook);
if (nfq_ct == NULL)
return -EOPNOTSUPP;
return nfq_ct->attach_expect(attr, ct, portid, report);
}
......@@ -15,7 +15,9 @@
#include <linux/ip.h>
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
#include <net/inet_sock.h>
#include <net/inet_hashtables.h>
#include <linux/inetdevice.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
......@@ -26,13 +28,18 @@
#define XT_TPROXY_HAVE_IPV6 1
#include <net/if_inet6.h>
#include <net/addrconf.h>
#include <net/inet6_hashtables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
#include <net/netfilter/nf_tproxy_core.h>
#include <linux/netfilter/xt_TPROXY.h>
enum nf_tproxy_lookup_t {
NFT_LOOKUP_LISTENER,
NFT_LOOKUP_ESTABLISHED,
};
static bool tproxy_sk_is_transparent(struct sock *sk)
{
if (sk->sk_state != TCP_TIME_WAIT) {
......@@ -68,6 +75,157 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
return laddr ? laddr : daddr;
}
/*
* This is used when the user wants to intercept a connection matching
* an explicit iptables rule. In this case the sockets are assumed
* matching in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple, it is returned, assuming the redirection
* already took place and we process a packet belonging to an
* established connection
*
* - match: if there's a listening socket matching the redirection
* (e.g. on-port & on-ip of the connection), it is returned,
* regardless if it was bound to 0.0.0.0 or an explicit
* address. The reasoning is that if there's an explicit rule, it
* does not really matter if the listener is bound to an interface
* or to 0. The user already stated that he wants redirection
* (since he added the rule).
*
* Please note that there's an overlap between what a TPROXY target
* and a socket match will match. Normally if you have both rules the
* "socket" match will be the first one, effectively all packets
* belonging to established connections going through that one.
*/
static inline struct sock *
nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
sk = inet_lookup_listener(net, &tcp_hashinfo,
saddr, sport,
daddr, dport,
in->ifindex);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NFT_LOOKUP_ESTABLISHED:
sk = inet_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
return sk;
}
#ifdef XT_TPROXY_HAVE_IPV6
static inline struct sock *
nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
sk = inet6_lookup_listener(net, &tcp_hashinfo,
saddr, sport,
daddr, ntohs(dport),
in->ifindex);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NFT_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
in->ifindex);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
return sk;
}
#endif
/**
* tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
......@@ -117,6 +275,15 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
return sk;
}
/* assign a socket to the skb -- consumes sk */
static void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk;
skb->destructor = sock_edemux;
}
static unsigned int
tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
u_int32_t mark_mask, u_int32_t mark_value)
......
......@@ -202,7 +202,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
return -EINVAL;
}
if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n");
pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
return -EINVAL;
}
if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
......
......@@ -19,12 +19,12 @@
#include <net/icmp.h>
#include <net/sock.h>
#include <net/inet_sock.h>
#include <net/netfilter/nf_tproxy_core.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
#define XT_SOCKET_HAVE_IPV6 1
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/inet6_hashtables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
......@@ -101,6 +101,43 @@ extract_icmp4_fields(const struct sk_buff *skb,
return 0;
}
/* "socket" match based redirection (no specific rule)
* ===================================================
*
* There are connections with dynamic endpoints (e.g. FTP data
* connection) that the user is unable to add explicit rules
* for. These are taken care of by a generic "socket" rule. It is
* assumed that the proxy application is trusted to open such
* connections without explicit iptables rule (except of course the
* generic 'socket' rule). In this case the following sockets are
* matched in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple
*
* - match: if there's a non-zero bound listener (possibly with a
* non-local address) We don't accept zero-bound listeners, since
* then local services could intercept traffic going through the
* box.
*/
static struct sock *
xt_socket_get_sock_v4(struct net *net, const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
return __inet_lookup(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
return udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
}
return NULL;
}
static bool
socket_match(const struct sk_buff *skb, struct xt_action_param *par,
const struct xt_socket_mtinfo1 *info)
......@@ -156,9 +193,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
#endif
if (!sk)
sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
saddr, daddr, sport, dport,
par->in, NFT_LOOKUP_ANY);
par->in);
if (sk) {
bool wildcard;
bool transparent = true;
......@@ -265,6 +302,25 @@ extract_icmp6_fields(const struct sk_buff *skb,
return 0;
}
static struct sock *
xt_socket_get_sock_v6(struct net *net, const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
return inet6_lookup(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
return udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
}
return NULL;
}
static bool
socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
{
......@@ -302,9 +358,9 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
}
if (!sk)
sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
saddr, daddr, sport, dport,
par->in, NFT_LOOKUP_ANY);
par->in);
if (sk) {
bool wildcard;
bool transparent = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment