Commit 4cb160d0 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for your net-next tree:

1) Get rid of nf_sk_is_transparent(), use inet_sk_transparent() instead.
   From Máté Eckl.

2) Move shared tproxy infrastructure to nf_tproxy_ipv4 and nf_tproxy_ipv6.
   Also from Máté.

3) Add hashtable to speed up chain lookups by name, from Florian Westphal.

4) Patch series to add connlimit support reusing part of the
   nf_conncount infrastructure. This includes preparation changes such
   passing context to the object and expression destroy interface;
   garbage collection for expressions embedded into set elements, and
   the introduction of the clone_destroy interface for expressions.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1ffdd8e1 1b2470e5
...@@ -13,4 +13,15 @@ unsigned int nf_conncount_count(struct net *net, ...@@ -13,4 +13,15 @@ unsigned int nf_conncount_count(struct net *net,
const u32 *key, const u32 *key,
const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone); const struct nf_conntrack_zone *zone);
unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit);
bool nf_conncount_add(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple);
void nf_conncount_cache_free(struct hlist_head *hhead);
#endif #endif
...@@ -3,19 +3,6 @@ ...@@ -3,19 +3,6 @@
#define _NF_SOCK_H_ #define _NF_SOCK_H_
#include <net/sock.h> #include <net/sock.h>
#include <net/inet_timewait_sock.h>
static inline bool nf_sk_is_transparent(struct sock *sk)
{
switch (sk->sk_state) {
case TCP_TIME_WAIT:
return inet_twsk(sk)->tw_transparent;
case TCP_NEW_SYN_RECV:
return inet_rsk(inet_reqsk(sk))->no_srccheck;
default:
return inet_sk(sk)->transparent;
}
}
struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
const struct net_device *indev); const struct net_device *indev);
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/netfilter/x_tables.h> #include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_tables.h> #include <linux/netfilter/nf_tables.h>
#include <linux/u64_stats_sync.h> #include <linux/u64_stats_sync.h>
#include <linux/rhashtable.h>
#include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_flow_table.h>
#include <net/netlink.h> #include <net/netlink.h>
...@@ -342,6 +343,7 @@ struct nft_set_ops { ...@@ -342,6 +343,7 @@ struct nft_set_ops {
const struct nft_set_desc *desc, const struct nft_set_desc *desc,
const struct nlattr * const nla[]); const struct nlattr * const nla[]);
void (*destroy)(const struct nft_set *set); void (*destroy)(const struct nft_set *set);
void (*gc_init)(const struct nft_set *set);
unsigned int elemsize; unsigned int elemsize;
}; };
...@@ -370,6 +372,8 @@ void nft_unregister_set(struct nft_set_type *type); ...@@ -370,6 +372,8 @@ void nft_unregister_set(struct nft_set_type *type);
* *
* @list: table set list node * @list: table set list node
* @bindings: list of set bindings * @bindings: list of set bindings
* @table: table this set belongs to
* @net: netnamespace this set belongs to
* @name: name of the set * @name: name of the set
* @handle: unique handle of the set * @handle: unique handle of the set
* @ktype: key type (numeric type defined by userspace, not used in the kernel) * @ktype: key type (numeric type defined by userspace, not used in the kernel)
...@@ -393,6 +397,8 @@ void nft_unregister_set(struct nft_set_type *type); ...@@ -393,6 +397,8 @@ void nft_unregister_set(struct nft_set_type *type);
struct nft_set { struct nft_set {
struct list_head list; struct list_head list;
struct list_head bindings; struct list_head bindings;
struct nft_table *table;
possible_net_t net;
char *name; char *name;
u64 handle; u64 handle;
u32 ktype; u32 ktype;
...@@ -708,6 +714,7 @@ struct nft_expr_type { ...@@ -708,6 +714,7 @@ struct nft_expr_type {
}; };
#define NFT_EXPR_STATEFUL 0x1 #define NFT_EXPR_STATEFUL 0x1
#define NFT_EXPR_GC 0x2
/** /**
* struct nft_expr_ops - nf_tables expression operations * struct nft_expr_ops - nf_tables expression operations
...@@ -739,11 +746,15 @@ struct nft_expr_ops { ...@@ -739,11 +746,15 @@ struct nft_expr_ops {
const struct nft_expr *expr); const struct nft_expr *expr);
void (*destroy)(const struct nft_ctx *ctx, void (*destroy)(const struct nft_ctx *ctx,
const struct nft_expr *expr); const struct nft_expr *expr);
void (*destroy_clone)(const struct nft_ctx *ctx,
const struct nft_expr *expr);
int (*dump)(struct sk_buff *skb, int (*dump)(struct sk_buff *skb,
const struct nft_expr *expr); const struct nft_expr *expr);
int (*validate)(const struct nft_ctx *ctx, int (*validate)(const struct nft_ctx *ctx,
const struct nft_expr *expr, const struct nft_expr *expr,
const struct nft_data **data); const struct nft_data **data);
bool (*gc)(struct net *net,
const struct nft_expr *expr);
const struct nft_expr_type *type; const struct nft_expr_type *type;
void *data; void *data;
}; };
...@@ -850,6 +861,7 @@ enum nft_chain_flags { ...@@ -850,6 +861,7 @@ enum nft_chain_flags {
* *
* @rules: list of rules in the chain * @rules: list of rules in the chain
* @list: used internally * @list: used internally
* @rhlhead: used internally
* @table: table that this chain belongs to * @table: table that this chain belongs to
* @handle: chain handle * @handle: chain handle
* @use: number of jump references to this chain * @use: number of jump references to this chain
...@@ -862,6 +874,7 @@ struct nft_chain { ...@@ -862,6 +874,7 @@ struct nft_chain {
struct nft_rule *__rcu *rules_gen_1; struct nft_rule *__rcu *rules_gen_1;
struct list_head rules; struct list_head rules;
struct list_head list; struct list_head list;
struct rhlist_head rhlhead;
struct nft_table *table; struct nft_table *table;
u64 handle; u64 handle;
u32 use; u32 use;
...@@ -955,7 +968,8 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); ...@@ -955,7 +968,8 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
* struct nft_table - nf_tables table * struct nft_table - nf_tables table
* *
* @list: used internally * @list: used internally
* @chains: chains in the table * @chains_ht: chains in the table
* @chains: same, for stable walks
* @sets: sets in the table * @sets: sets in the table
* @objects: stateful objects in the table * @objects: stateful objects in the table
* @flowtables: flow tables in the table * @flowtables: flow tables in the table
...@@ -969,6 +983,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); ...@@ -969,6 +983,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
*/ */
struct nft_table { struct nft_table {
struct list_head list; struct list_head list;
struct rhltable chains_ht;
struct list_head chains; struct list_head chains;
struct list_head sets; struct list_head sets;
struct list_head objects; struct list_head objects;
...@@ -1070,7 +1085,8 @@ struct nft_object_ops { ...@@ -1070,7 +1085,8 @@ struct nft_object_ops {
int (*init)(const struct nft_ctx *ctx, int (*init)(const struct nft_ctx *ctx,
const struct nlattr *const tb[], const struct nlattr *const tb[],
struct nft_object *obj); struct nft_object *obj);
void (*destroy)(struct nft_object *obj); void (*destroy)(const struct nft_ctx *ctx,
struct nft_object *obj);
int (*dump)(struct sk_buff *skb, int (*dump)(struct sk_buff *skb,
struct nft_object *obj, struct nft_object *obj,
bool reset); bool reset);
......
#ifndef _NF_TPROXY_H_
#define _NF_TPROXY_H_
#include <net/tcp.h>
enum nf_tproxy_lookup_t {
NF_TPROXY_LOOKUP_LISTENER,
NF_TPROXY_LOOKUP_ESTABLISHED,
};
static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
{
if (inet_sk_transparent(sk))
return true;
sock_gen_put(sk);
return false;
}
__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
/**
* nf_tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
* @laddr: IPv4 address to redirect to or zero.
* @lport: TCP port to redirect to or zero.
* @sk: The TIME_WAIT TCP socket found by the lookup.
*
* We have to handle SYN packets arriving to TIME_WAIT sockets
* differently: instead of reopening the connection we should rather
* redirect the new connection to the proxy if there's a listener
* socket present.
*
* nf_tproxy_handle_time_wait4() consumes the socket reference passed in.
*
* Returns the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
struct sock *
nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
__be32 laddr, __be16 lport, struct sock *sk);
/*
* This is used when the user wants to intercept a connection matching
* an explicit iptables rule. In this case the sockets are assumed
* matching in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple, it is returned, assuming the redirection
* already took place and we process a packet belonging to an
* established connection
*
* - match: if there's a listening socket matching the redirection
* (e.g. on-port & on-ip of the connection), it is returned,
* regardless if it was bound to 0.0.0.0 or an explicit
* address. The reasoning is that if there's an explicit rule, it
* does not really matter if the listener is bound to an interface
* or to 0. The user already stated that he wants redirection
* (since he added the rule).
*
* Please note that there's an overlap between what a TPROXY target
* and a socket match will match. Normally if you have both rules the
* "socket" match will be the first one, effectively all packets
* belonging to established connections going through that one.
*/
struct sock *
nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type);
const struct in6_addr *
nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
const struct in6_addr *daddr);
/**
* nf_tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
* @tproto: Transport protocol.
* @thoff: Transport protocol header offset.
* @net: Network namespace.
* @laddr: IPv6 address to redirect to.
* @lport: TCP port to redirect to or zero.
* @sk: The TIME_WAIT TCP socket found by the lookup.
*
* We have to handle SYN packets arriving to TIME_WAIT sockets
* differently: instead of reopening the connection we should rather
* redirect the new connection to the proxy if there's a listener
* socket present.
*
* nf_tproxy_handle_time_wait6() consumes the socket reference passed in.
*
* Returns the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
struct sock *
nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
struct net *net,
const struct in6_addr *laddr,
const __be16 lport,
struct sock *sk);
struct sock *
nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type);
#endif /* _NF_TPROXY_H_ */
...@@ -1043,6 +1043,24 @@ enum nft_limit_attributes { ...@@ -1043,6 +1043,24 @@ enum nft_limit_attributes {
}; };
#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) #define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1)
enum nft_connlimit_flags {
NFT_CONNLIMIT_F_INV = (1 << 0),
};
/**
* enum nft_connlimit_attributes - nf_tables connlimit expression netlink attributes
*
* @NFTA_CONNLIMIT_COUNT: number of connections (NLA_U32)
* @NFTA_CONNLIMIT_FLAGS: flags (NLA_U32: enum nft_connlimit_flags)
*/
enum nft_connlimit_attributes {
NFTA_CONNLIMIT_UNSPEC,
NFTA_CONNLIMIT_COUNT,
NFTA_CONNLIMIT_FLAGS,
__NFTA_CONNLIMIT_MAX
};
#define NFTA_CONNLIMIT_MAX (__NFTA_CONNLIMIT_MAX - 1)
/** /**
* enum nft_counter_attributes - nf_tables counter expression netlink attributes * enum nft_counter_attributes - nf_tables counter expression netlink attributes
* *
...@@ -1357,7 +1375,8 @@ enum nft_ct_helper_attributes { ...@@ -1357,7 +1375,8 @@ enum nft_ct_helper_attributes {
#define NFT_OBJECT_QUOTA 2 #define NFT_OBJECT_QUOTA 2
#define NFT_OBJECT_CT_HELPER 3 #define NFT_OBJECT_CT_HELPER 3
#define NFT_OBJECT_LIMIT 4 #define NFT_OBJECT_LIMIT 4
#define __NFT_OBJECT_MAX 5 #define NFT_OBJECT_CONNLIMIT 5
#define __NFT_OBJECT_MAX 6
#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) #define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1)
/** /**
......
...@@ -29,7 +29,10 @@ config NF_SOCKET_IPV4 ...@@ -29,7 +29,10 @@ config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support" tristate "IPv4 socket lookup support"
help help
This option enables the IPv4 socket lookup infrastructure. This is This option enables the IPv4 socket lookup infrastructure. This is
is required by the iptables socket match. is required by the {ip,nf}tables socket match.
config NF_TPROXY_IPV4
tristate "IPv4 tproxy support"
if NF_TABLES if NF_TABLES
......
...@@ -17,6 +17,7 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o ...@@ -17,6 +17,7 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
obj-$(CONFIG_NF_TPROXY_IPV4) += nf_tproxy_ipv4.o
# logging # logging
obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
......
/*
* Copyright (C) 2007-2008 BalaBit IT Ltd.
* Author: Krisztian Kovacs
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <net/netfilter/nf_tproxy.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/inet_sock.h>
#include <linux/ip.h>
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
#include <linux/inetdevice.h>
struct sock *
nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
__be32 laddr, __be16 lport, struct sock *sk)
{
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr _hdr, *hp;
hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
if (hp == NULL) {
inet_twsk_put(inet_twsk(sk));
return NULL;
}
if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
/* SYN to a TIME_WAIT socket, we'd rather redirect it
* to a listener socket if there's one */
struct sock *sk2;
sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
return sk;
}
EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait4);
__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
{
struct in_device *indev;
__be32 laddr;
if (user_laddr)
return user_laddr;
laddr = 0;
indev = __in_dev_get_rcu(skb->dev);
for_primary_ifa(indev) {
laddr = ifa->ifa_local;
break;
} endfor_ifa(indev);
return laddr ? laddr : daddr;
}
EXPORT_SYMBOL_GPL(nf_tproxy_laddr4);
struct sock *
nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
tcph = hp;
sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
ip_hdrlen(skb) +
__tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NF_TPROXY_LOOKUP_ESTABLISHED:
sk = inet_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NF_TPROXY_LOOKUP_ESTABLISHED &&
(!connected || wildcard)) ||
(lookup_type == NF_TPROXY_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
return sk;
}
EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
...@@ -29,7 +29,10 @@ config NF_SOCKET_IPV6 ...@@ -29,7 +29,10 @@ config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support" tristate "IPv6 socket lookup support"
help help
This option enables the IPv6 socket lookup infrastructure. This This option enables the IPv6 socket lookup infrastructure. This
is used by the ip6tables socket match. is used by the {ip6,nf}tables socket match.
config NF_TPROXY_IPV6
tristate "IPv6 tproxy support"
if NF_TABLES if NF_TABLES
......
...@@ -26,6 +26,7 @@ nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o ...@@ -26,6 +26,7 @@ nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o
obj-$(CONFIG_NF_TPROXY_IPV6) += nf_tproxy_ipv6.o
# logging # logging
obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
......
#include <net/netfilter/nf_tproxy.h>
#include <linux/module.h>
#include <net/inet6_hashtables.h>
#include <net/addrconf.h>
#include <net/udp.h>
#include <net/tcp.h>
const struct in6_addr *
nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
const struct in6_addr *daddr)
{
struct inet6_dev *indev;
struct inet6_ifaddr *ifa;
struct in6_addr *laddr;
if (!ipv6_addr_any(user_laddr))
return user_laddr;
laddr = NULL;
indev = __in6_dev_get(skb->dev);
if (indev) {
read_lock_bh(&indev->lock);
list_for_each_entry(ifa, &indev->addr_list, if_list) {
if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
continue;
laddr = &ifa->addr;
break;
}
read_unlock_bh(&indev->lock);
}
return laddr ? laddr : daddr;
}
EXPORT_SYMBOL_GPL(nf_tproxy_laddr6);
struct sock *
nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
struct net *net,
const struct in6_addr *laddr,
const __be16 lport,
struct sock *sk)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr _hdr, *hp;
hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
if (hp == NULL) {
inet_twsk_put(inet_twsk(sk));
return NULL;
}
if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
/* SYN to a TIME_WAIT socket, we'd rather redirect it
* to a listener socket if there's one */
struct sock *sk2;
sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, hp, tproto,
&iph->saddr,
nf_tproxy_laddr6(skb, laddr, &iph->daddr),
hp->source,
lport ? lport : hp->dest,
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
return sk;
}
EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait6);
struct sock *
nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
tcph = hp;
sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NF_TPROXY_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
in->ifindex, 0);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NF_TPROXY_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NF_TPROXY_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
return sk;
}
EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v6);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
...@@ -517,6 +517,15 @@ config NFT_COUNTER ...@@ -517,6 +517,15 @@ config NFT_COUNTER
This option adds the "counter" expression that you can use to This option adds the "counter" expression that you can use to
include packet and byte counters in a rule. include packet and byte counters in a rule.
config NFT_CONNLIMIT
tristate "Netfilter nf_tables connlimit module"
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_CONNCOUNT
help
This option adds the "connlimit" expression that you can use to
ratelimit rule matchings per connections.
config NFT_LOG config NFT_LOG
tristate "Netfilter nf_tables log module" tristate "Netfilter nf_tables log module"
help help
...@@ -989,6 +998,8 @@ config NETFILTER_XT_TARGET_TPROXY ...@@ -989,6 +998,8 @@ config NETFILTER_XT_TARGET_TPROXY
depends on IP_NF_MANGLE depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
select NF_TPROXY_IPV4
select NF_TPROXY_IPV6 if IP6_NF_IPTABLES
help help
This option adds a `TPROXY' target, which is somewhat similar to This option adds a `TPROXY' target, which is somewhat similar to
REDIRECT. It can only be used in the mangle table and is useful REDIRECT. It can only be used in the mangle table and is useful
......
...@@ -80,6 +80,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ ...@@ -80,6 +80,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
......
...@@ -79,7 +79,7 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) ...@@ -79,7 +79,7 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32)); return memcmp(a, b, klen * sizeof(u32));
} }
static bool add_hlist(struct hlist_head *head, bool nf_conncount_add(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple) const struct nf_conntrack_tuple *tuple)
{ {
struct nf_conncount_tuple *conn; struct nf_conncount_tuple *conn;
...@@ -91,9 +91,9 @@ static bool add_hlist(struct hlist_head *head, ...@@ -91,9 +91,9 @@ static bool add_hlist(struct hlist_head *head,
hlist_add_head(&conn->node, head); hlist_add_head(&conn->node, head);
return true; return true;
} }
EXPORT_SYMBOL_GPL(nf_conncount_add);
static unsigned int check_hlist(struct net *net, unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
struct hlist_head *head,
const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone, const struct nf_conntrack_zone *zone,
bool *addit) bool *addit)
...@@ -141,6 +141,7 @@ static unsigned int check_hlist(struct net *net, ...@@ -141,6 +141,7 @@ static unsigned int check_hlist(struct net *net,
return length; return length;
} }
EXPORT_SYMBOL_GPL(nf_conncount_lookup);
static void tree_nodes_free(struct rb_root *root, static void tree_nodes_free(struct rb_root *root,
struct nf_conncount_rb *gc_nodes[], struct nf_conncount_rb *gc_nodes[],
...@@ -187,13 +188,15 @@ count_tree(struct net *net, struct rb_root *root, ...@@ -187,13 +188,15 @@ count_tree(struct net *net, struct rb_root *root,
} else { } else {
/* same source network -> be counted! */ /* same source network -> be counted! */
unsigned int count; unsigned int count;
count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
count = nf_conncount_lookup(net, &rbconn->hhead, tuple,
zone, &addit);
tree_nodes_free(root, gc_nodes, gc_count); tree_nodes_free(root, gc_nodes, gc_count);
if (!addit) if (!addit)
return count; return count;
if (!add_hlist(&rbconn->hhead, tuple)) if (!nf_conncount_add(&rbconn->hhead, tuple))
return 0; /* hotdrop */ return 0; /* hotdrop */
return count + 1; return count + 1;
...@@ -203,7 +206,7 @@ count_tree(struct net *net, struct rb_root *root, ...@@ -203,7 +206,7 @@ count_tree(struct net *net, struct rb_root *root,
continue; continue;
/* only used for GC on hhead, retval and 'addit' ignored */ /* only used for GC on hhead, retval and 'addit' ignored */
check_hlist(net, &rbconn->hhead, tuple, zone, &addit); nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit);
if (hlist_empty(&rbconn->hhead)) if (hlist_empty(&rbconn->hhead))
gc_nodes[gc_count++] = rbconn; gc_nodes[gc_count++] = rbconn;
} }
...@@ -303,11 +306,19 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family ...@@ -303,11 +306,19 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
} }
EXPORT_SYMBOL_GPL(nf_conncount_init); EXPORT_SYMBOL_GPL(nf_conncount_init);
static void destroy_tree(struct rb_root *r) void nf_conncount_cache_free(struct hlist_head *hhead)
{ {
struct nf_conncount_tuple *conn; struct nf_conncount_tuple *conn;
struct nf_conncount_rb *rbconn;
struct hlist_node *n; struct hlist_node *n;
hlist_for_each_entry_safe(conn, n, hhead, node)
kmem_cache_free(conncount_conn_cachep, conn);
}
EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
static void destroy_tree(struct rb_root *r)
{
struct nf_conncount_rb *rbconn;
struct rb_node *node; struct rb_node *node;
while ((node = rb_first(r)) != NULL) { while ((node = rb_first(r)) != NULL) {
...@@ -315,8 +326,7 @@ static void destroy_tree(struct rb_root *r) ...@@ -315,8 +326,7 @@ static void destroy_tree(struct rb_root *r)
rb_erase(node, r); rb_erase(node, r);
hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) nf_conncount_cache_free(&rbconn->hhead);
kmem_cache_free(conncount_conn_cachep, conn);
kmem_cache_free(conncount_rb_cachep, rbconn); kmem_cache_free(conncount_rb_cachep, rbconn);
} }
......
...@@ -34,6 +34,20 @@ enum { ...@@ -34,6 +34,20 @@ enum {
NFT_VALIDATE_DO, NFT_VALIDATE_DO,
}; };
static u32 nft_chain_hash(const void *data, u32 len, u32 seed);
static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed);
static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);
static const struct rhashtable_params nft_chain_ht_params = {
.head_offset = offsetof(struct nft_chain, rhlhead),
.key_offset = offsetof(struct nft_chain, name),
.hashfn = nft_chain_hash,
.obj_hashfn = nft_chain_hash_obj,
.obj_cmpfn = nft_chain_hash_cmp,
.locks_mul = 1,
.automatic_shrinking = true,
};
static void nft_validate_state_update(struct net *net, u8 new_validate_state) static void nft_validate_state_update(struct net *net, u8 new_validate_state)
{ {
switch (net->nft.validate_state) { switch (net->nft.validate_state) {
...@@ -720,6 +734,29 @@ static int nf_tables_updtable(struct nft_ctx *ctx) ...@@ -720,6 +734,29 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return ret; return ret;
} }
static u32 nft_chain_hash(const void *data, u32 len, u32 seed)
{
const char *name = data;
return jhash(name, strlen(name), seed);
}
static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed)
{
const struct nft_chain *chain = data;
return nft_chain_hash(chain->name, 0, seed);
}
static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct nft_chain *chain = ptr;
const char *name = arg->key;
return strcmp(chain->name, name);
}
static int nf_tables_newtable(struct net *net, struct sock *nlsk, static int nf_tables_newtable(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh, struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[], const struct nlattr * const nla[],
...@@ -766,6 +803,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, ...@@ -766,6 +803,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (table->name == NULL) if (table->name == NULL)
goto err_strdup; goto err_strdup;
err = rhltable_init(&table->chains_ht, &nft_chain_ht_params);
if (err)
goto err_chain_ht;
INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects); INIT_LIST_HEAD(&table->objects);
...@@ -782,6 +823,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, ...@@ -782,6 +823,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
list_add_tail_rcu(&table->list, &net->nft.tables); list_add_tail_rcu(&table->list, &net->nft.tables);
return 0; return 0;
err_trans: err_trans:
rhltable_destroy(&table->chains_ht);
err_chain_ht:
kfree(table->name); kfree(table->name);
err_strdup: err_strdup:
kfree(table); kfree(table);
...@@ -922,6 +965,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) ...@@ -922,6 +965,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
{ {
BUG_ON(ctx->table->use > 0); BUG_ON(ctx->table->use > 0);
rhltable_destroy(&ctx->table->chains_ht);
kfree(ctx->table->name); kfree(ctx->table->name);
kfree(ctx->table); kfree(ctx->table);
} }
...@@ -967,21 +1011,35 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask) ...@@ -967,21 +1011,35 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
} }
static struct nft_chain *nft_chain_lookup(const struct nft_table *table, static struct nft_chain *nft_chain_lookup(struct nft_table *table,
const struct nlattr *nla, u8 genmask) const struct nlattr *nla, u8 genmask)
{ {
char search[NFT_CHAIN_MAXNAMELEN + 1];
struct rhlist_head *tmp, *list;
struct nft_chain *chain; struct nft_chain *chain;
if (nla == NULL) if (nla == NULL)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
list_for_each_entry_rcu(chain, &table->chains, list) { nla_strlcpy(search, nla, sizeof(search));
if (!nla_strcmp(nla, chain->name) &&
nft_active_genmask(chain, genmask))
return chain;
}
return ERR_PTR(-ENOENT); WARN_ON(!rcu_read_lock_held() &&
!lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
chain = ERR_PTR(-ENOENT);
rcu_read_lock();
list = rhltable_lookup(&table->chains_ht, search, nft_chain_ht_params);
if (!list)
goto out_unlock;
rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
if (nft_active_genmask(chain, genmask))
goto out_unlock;
}
chain = ERR_PTR(-ENOENT);
out_unlock:
rcu_read_unlock();
return chain;
} }
static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
...@@ -1185,8 +1243,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, ...@@ -1185,8 +1243,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
{ {
const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_cur(net);
const struct nft_table *table;
const struct nft_chain *chain; const struct nft_chain *chain;
struct nft_table *table;
struct sk_buff *skb2; struct sk_buff *skb2;
int family = nfmsg->nfgen_family; int family = nfmsg->nfgen_family;
int err; int err;
...@@ -1504,9 +1562,17 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, ...@@ -1504,9 +1562,17 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err < 0) if (err < 0)
goto err1; goto err1;
err = rhltable_insert_key(&table->chains_ht, chain->name,
&chain->rhlhead, nft_chain_ht_params);
if (err)
goto err2;
err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (err < 0) if (err < 0) {
rhltable_remove(&table->chains_ht, &chain->rhlhead,
nft_chain_ht_params);
goto err2; goto err2;
}
table->use++; table->use++;
list_add_tail_rcu(&chain->list, &table->chains); list_add_tail_rcu(&chain->list, &table->chains);
...@@ -2206,9 +2272,9 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, ...@@ -2206,9 +2272,9 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
{ {
const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_cur(net);
const struct nft_table *table;
const struct nft_chain *chain; const struct nft_chain *chain;
const struct nft_rule *rule; const struct nft_rule *rule;
struct nft_table *table;
struct sk_buff *skb2; struct sk_buff *skb2;
int family = nfmsg->nfgen_family; int family = nfmsg->nfgen_family;
int err; int err;
...@@ -3359,6 +3425,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, ...@@ -3359,6 +3425,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
} }
INIT_LIST_HEAD(&set->bindings); INIT_LIST_HEAD(&set->bindings);
set->table = table;
write_pnet(&set->net, net);
set->ops = ops; set->ops = ops;
set->ktype = ktype; set->ktype = ktype;
set->klen = desc.klen; set->klen = desc.klen;
...@@ -4036,12 +4104,24 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, ...@@ -4036,12 +4104,24 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr) bool destroy_expr)
{ {
struct nft_set_ext *ext = nft_set_elem_ext(set, elem); struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
struct nft_ctx ctx = {
.net = read_pnet(&set->net),
.family = set->table->family,
};
nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE); nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype); nft_data_release(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) {
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); struct nft_expr *expr = nft_set_ext_expr(ext);
if (expr->ops->destroy_clone) {
expr->ops->destroy_clone(&ctx, expr);
module_put(expr->ops->type->owner);
} else {
nf_tables_expr_destroy(&ctx, expr);
}
}
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
(*nft_set_ext_obj(ext))->use--; (*nft_set_ext_obj(ext))->use--;
kfree(elem); kfree(elem);
...@@ -4051,12 +4131,13 @@ EXPORT_SYMBOL_GPL(nft_set_elem_destroy); ...@@ -4051,12 +4131,13 @@ EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
/* Only called from commit path, nft_set_elem_deactivate() already deals with /* Only called from commit path, nft_set_elem_deactivate() already deals with
* the refcounting from the preparation phase. * the refcounting from the preparation phase.
*/ */
static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem) static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
const struct nft_set *set, void *elem)
{ {
struct nft_set_ext *ext = nft_set_elem_ext(set, elem); struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext));
kfree(elem); kfree(elem);
} }
...@@ -4787,7 +4868,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, ...@@ -4787,7 +4868,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
kfree(obj->name); kfree(obj->name);
err2: err2:
if (obj->ops->destroy) if (obj->ops->destroy)
obj->ops->destroy(obj); obj->ops->destroy(&ctx, obj);
kfree(obj); kfree(obj);
err1: err1:
module_put(type->owner); module_put(type->owner);
...@@ -4997,10 +5078,10 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, ...@@ -4997,10 +5078,10 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
return err; return err;
} }
static void nft_obj_destroy(struct nft_object *obj) static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
{ {
if (obj->ops->destroy) if (obj->ops->destroy)
obj->ops->destroy(obj); obj->ops->destroy(ctx, obj);
module_put(obj->ops->type->owner); module_put(obj->ops->type->owner);
kfree(obj->name); kfree(obj->name);
...@@ -5966,8 +6047,16 @@ static void nft_chain_commit_update(struct nft_trans *trans) ...@@ -5966,8 +6047,16 @@ static void nft_chain_commit_update(struct nft_trans *trans)
{ {
struct nft_base_chain *basechain; struct nft_base_chain *basechain;
if (nft_trans_chain_name(trans)) if (nft_trans_chain_name(trans)) {
rhltable_remove(&trans->ctx.table->chains_ht,
&trans->ctx.chain->rhlhead,
nft_chain_ht_params);
swap(trans->ctx.chain->name, nft_trans_chain_name(trans)); swap(trans->ctx.chain->name, nft_trans_chain_name(trans));
rhltable_insert_key(&trans->ctx.table->chains_ht,
trans->ctx.chain->name,
&trans->ctx.chain->rhlhead,
nft_chain_ht_params);
}
if (!nft_is_base_chain(trans->ctx.chain)) if (!nft_is_base_chain(trans->ctx.chain))
return; return;
...@@ -5999,11 +6088,12 @@ static void nft_commit_release(struct nft_trans *trans) ...@@ -5999,11 +6088,12 @@ static void nft_commit_release(struct nft_trans *trans)
nft_set_destroy(nft_trans_set(trans)); nft_set_destroy(nft_trans_set(trans));
break; break;
case NFT_MSG_DELSETELEM: case NFT_MSG_DELSETELEM:
nf_tables_set_elem_destroy(nft_trans_elem_set(trans), nf_tables_set_elem_destroy(&trans->ctx,
nft_trans_elem_set(trans),
nft_trans_elem(trans).priv); nft_trans_elem(trans).priv);
break; break;
case NFT_MSG_DELOBJ: case NFT_MSG_DELOBJ:
nft_obj_destroy(nft_trans_obj(trans)); nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
break; break;
case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DELFLOWTABLE:
nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
...@@ -6143,6 +6233,15 @@ static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *cha ...@@ -6143,6 +6233,15 @@ static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *cha
nf_tables_commit_chain_free_rules_old(g0); nf_tables_commit_chain_free_rules_old(g0);
} }
static void nft_chain_del(struct nft_chain *chain)
{
struct nft_table *table = chain->table;
WARN_ON_ONCE(rhltable_remove(&table->chains_ht, &chain->rhlhead,
nft_chain_ht_params));
list_del_rcu(&chain->list);
}
static int nf_tables_commit(struct net *net, struct sk_buff *skb) static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{ {
struct nft_trans *trans, *next; struct nft_trans *trans, *next;
...@@ -6217,7 +6316,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) ...@@ -6217,7 +6316,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans); nft_trans_destroy(trans);
break; break;
case NFT_MSG_DELCHAIN: case NFT_MSG_DELCHAIN:
list_del_rcu(&trans->ctx.chain->list); nft_chain_del(trans->ctx.chain);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
nf_tables_unregister_hook(trans->ctx.net, nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table, trans->ctx.table,
...@@ -6328,7 +6427,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) ...@@ -6328,7 +6427,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nft_trans_elem(trans).priv, true); nft_trans_elem(trans).priv, true);
break; break;
case NFT_MSG_NEWOBJ: case NFT_MSG_NEWOBJ:
nft_obj_destroy(nft_trans_obj(trans)); nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
break; break;
case NFT_MSG_NEWFLOWTABLE: case NFT_MSG_NEWFLOWTABLE:
nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
...@@ -6368,7 +6467,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) ...@@ -6368,7 +6467,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans); nft_trans_destroy(trans);
} else { } else {
trans->ctx.table->use--; trans->ctx.table->use--;
list_del_rcu(&trans->ctx.chain->list); nft_chain_del(trans->ctx.chain);
nf_tables_unregister_hook(trans->ctx.net, nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table, trans->ctx.table,
trans->ctx.chain); trans->ctx.chain);
...@@ -6970,7 +7069,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) ...@@ -6970,7 +7069,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
ctx->chain->use--; ctx->chain->use--;
nf_tables_rule_release(ctx, rule); nf_tables_rule_release(ctx, rule);
} }
list_del(&ctx->chain->list); nft_chain_del(ctx->chain);
ctx->table->use--; ctx->table->use--;
nf_tables_chain_destroy(ctx); nf_tables_chain_destroy(ctx);
...@@ -7022,11 +7121,11 @@ static void __nft_release_tables(struct net *net) ...@@ -7022,11 +7121,11 @@ static void __nft_release_tables(struct net *net)
list_for_each_entry_safe(obj, ne, &table->objects, list) { list_for_each_entry_safe(obj, ne, &table->objects, list) {
list_del(&obj->list); list_del(&obj->list);
table->use--; table->use--;
nft_obj_destroy(obj); nft_obj_destroy(&ctx, obj);
} }
list_for_each_entry_safe(chain, nc, &table->chains, list) { list_for_each_entry_safe(chain, nc, &table->chains, list) {
ctx.chain = chain; ctx.chain = chain;
list_del(&chain->list); nft_chain_del(chain);
table->use--; table->use--;
nf_tables_chain_destroy(&ctx); nf_tables_chain_destroy(&ctx);
} }
......
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_count.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_zones.h>
struct nft_connlimit {
spinlock_t lock;
struct hlist_head hhead;
u32 limit;
bool invert;
};
static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt,
const struct nft_set_ext *ext)
{
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
const struct nf_conntrack_tuple *tuple_ptr;
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int count;
bool addit;
tuple_ptr = &tuple;
ct = nf_ct_get(pkt->skb, &ctinfo);
if (ct != NULL) {
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
zone = nf_ct_zone(ct);
} else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
nft_pf(pkt), nft_net(pkt), &tuple)) {
regs->verdict.code = NF_DROP;
return;
}
spin_lock_bh(&priv->lock);
count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone,
&addit);
if (!addit)
goto out;
if (!nf_conncount_add(&priv->hhead, tuple_ptr)) {
regs->verdict.code = NF_DROP;
spin_unlock_bh(&priv->lock);
return;
}
count++;
out:
spin_unlock_bh(&priv->lock);
if ((count > priv->limit) ^ priv->invert) {
regs->verdict.code = NFT_BREAK;
return;
}
}
static int nft_connlimit_do_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_connlimit *priv)
{
bool invert = false;
u32 flags, limit;
if (!tb[NFTA_CONNLIMIT_COUNT])
return -EINVAL;
limit = ntohl(nla_get_be32(tb[NFTA_CONNLIMIT_COUNT]));
if (tb[NFTA_CONNLIMIT_FLAGS]) {
flags = ntohl(nla_get_be32(tb[NFTA_CONNLIMIT_FLAGS]));
if (flags & ~NFT_CONNLIMIT_F_INV)
return -EOPNOTSUPP;
if (flags & NFT_CONNLIMIT_F_INV)
invert = true;
}
spin_lock_init(&priv->lock);
INIT_HLIST_HEAD(&priv->hhead);
priv->limit = limit;
priv->invert = invert;
return nf_ct_netns_get(ctx->net, ctx->family);
}
static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
struct nft_connlimit *priv)
{
nf_ct_netns_put(ctx->net, ctx->family);
nf_conncount_cache_free(&priv->hhead);
}
static int nft_connlimit_do_dump(struct sk_buff *skb,
struct nft_connlimit *priv)
{
if (nla_put_be32(skb, NFTA_CONNLIMIT_COUNT, htonl(priv->limit)))
goto nla_put_failure;
if (priv->invert &&
nla_put_be32(skb, NFTA_CONNLIMIT_FLAGS, htonl(NFT_CONNLIMIT_F_INV)))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static inline void nft_connlimit_obj_eval(struct nft_object *obj,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_connlimit *priv = nft_obj_data(obj);
nft_connlimit_do_eval(priv, regs, pkt, NULL);
}
static int nft_connlimit_obj_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_connlimit *priv = nft_obj_data(obj);
return nft_connlimit_do_init(ctx, tb, priv);
}
static void nft_connlimit_obj_destroy(const struct nft_ctx *ctx,
struct nft_object *obj)
{
struct nft_connlimit *priv = nft_obj_data(obj);
nft_connlimit_do_destroy(ctx, priv);
}
static int nft_connlimit_obj_dump(struct sk_buff *skb,
struct nft_object *obj, bool reset)
{
struct nft_connlimit *priv = nft_obj_data(obj);
return nft_connlimit_do_dump(skb, priv);
}
static const struct nla_policy nft_connlimit_policy[NFTA_CONNLIMIT_MAX + 1] = {
[NFTA_CONNLIMIT_COUNT] = { .type = NLA_U32 },
[NFTA_CONNLIMIT_FLAGS] = { .type = NLA_U32 },
};
static struct nft_object_type nft_connlimit_obj_type;
static const struct nft_object_ops nft_connlimit_obj_ops = {
.type = &nft_connlimit_obj_type,
.size = sizeof(struct nft_connlimit),
.eval = nft_connlimit_obj_eval,
.init = nft_connlimit_obj_init,
.destroy = nft_connlimit_obj_destroy,
.dump = nft_connlimit_obj_dump,
};
static struct nft_object_type nft_connlimit_obj_type __read_mostly = {
.type = NFT_OBJECT_CONNLIMIT,
.ops = &nft_connlimit_obj_ops,
.maxattr = NFTA_CONNLIMIT_MAX,
.policy = nft_connlimit_policy,
.owner = THIS_MODULE,
};
static void nft_connlimit_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
nft_connlimit_do_eval(priv, regs, pkt, NULL);
}
static int nft_connlimit_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
return nft_connlimit_do_dump(skb, priv);
}
static int nft_connlimit_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_connlimit *priv = nft_expr_priv(expr);
return nft_connlimit_do_init(ctx, tb, priv);
}
static void nft_connlimit_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
nft_connlimit_do_destroy(ctx, priv);
}
static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
{
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
spin_lock_init(&priv_dst->lock);
INIT_HLIST_HEAD(&priv_dst->hhead);
priv_dst->limit = priv_src->limit;
priv_dst->invert = priv_src->invert;
return 0;
}
static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
nf_conncount_cache_free(&priv->hhead);
}
static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
bool addit, ret;
spin_lock_bh(&priv->lock);
nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit);
ret = hlist_empty(&priv->hhead);
spin_unlock_bh(&priv->lock);
return ret;
}
static struct nft_expr_type nft_connlimit_type;
static const struct nft_expr_ops nft_connlimit_ops = {
.type = &nft_connlimit_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_connlimit)),
.eval = nft_connlimit_eval,
.init = nft_connlimit_init,
.destroy = nft_connlimit_destroy,
.clone = nft_connlimit_clone,
.destroy_clone = nft_connlimit_destroy_clone,
.dump = nft_connlimit_dump,
.gc = nft_connlimit_gc,
};
static struct nft_expr_type nft_connlimit_type __read_mostly = {
.name = "connlimit",
.ops = &nft_connlimit_ops,
.policy = nft_connlimit_policy,
.maxattr = NFTA_CONNLIMIT_MAX,
.flags = NFT_EXPR_STATEFUL | NFT_EXPR_GC,
.owner = THIS_MODULE,
};
static int __init nft_connlimit_module_init(void)
{
int err;
err = nft_register_obj(&nft_connlimit_obj_type);
if (err < 0)
return err;
err = nft_register_expr(&nft_connlimit_type);
if (err < 0)
goto err1;
return 0;
err1:
nft_unregister_obj(&nft_connlimit_obj_type);
return err;
}
static void __exit nft_connlimit_module_exit(void)
{
nft_unregister_expr(&nft_connlimit_type);
nft_unregister_obj(&nft_connlimit_obj_type);
}
module_init(nft_connlimit_module_init);
module_exit(nft_connlimit_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso");
MODULE_ALIAS_NFT_EXPR("connlimit");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CONNLIMIT);
...@@ -96,7 +96,8 @@ static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv) ...@@ -96,7 +96,8 @@ static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv)
free_percpu(priv->counter); free_percpu(priv->counter);
} }
static void nft_counter_obj_destroy(struct nft_object *obj) static void nft_counter_obj_destroy(const struct nft_ctx *ctx,
struct nft_object *obj)
{ {
struct nft_counter_percpu_priv *priv = nft_obj_data(obj); struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
...@@ -257,6 +258,7 @@ static const struct nft_expr_ops nft_counter_ops = { ...@@ -257,6 +258,7 @@ static const struct nft_expr_ops nft_counter_ops = {
.eval = nft_counter_eval, .eval = nft_counter_eval,
.init = nft_counter_init, .init = nft_counter_init,
.destroy = nft_counter_destroy, .destroy = nft_counter_destroy,
.destroy_clone = nft_counter_destroy,
.dump = nft_counter_dump, .dump = nft_counter_dump,
.clone = nft_counter_clone, .clone = nft_counter_clone,
}; };
......
...@@ -826,7 +826,8 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, ...@@ -826,7 +826,8 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
return 0; return 0;
} }
static void nft_ct_helper_obj_destroy(struct nft_object *obj) static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx,
struct nft_object *obj)
{ {
struct nft_ct_helper_obj *priv = nft_obj_data(obj); struct nft_ct_helper_obj *priv = nft_obj_data(obj);
......
...@@ -195,6 +195,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, ...@@ -195,6 +195,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL)) if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL))
goto err1; goto err1;
if (priv->expr->ops->type->flags & NFT_EXPR_GC) {
if (set->flags & NFT_SET_TIMEOUT)
goto err1;
if (!set->ops->gc_init)
goto err1;
set->ops->gc_init(set);
}
} else if (set->flags & NFT_SET_EVAL) } else if (set->flags & NFT_SET_EVAL)
return -EINVAL; return -EINVAL;
......
...@@ -311,8 +311,16 @@ static void nft_rhash_gc(struct work_struct *work) ...@@ -311,8 +311,16 @@ static void nft_rhash_gc(struct work_struct *work)
continue; continue;
} }
if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPR)) {
struct nft_expr *expr = nft_set_ext_expr(&he->ext);
if (expr->ops->gc &&
expr->ops->gc(read_pnet(&set->net), expr))
goto gc;
}
if (!nft_set_elem_expired(&he->ext)) if (!nft_set_elem_expired(&he->ext))
continue; continue;
gc:
if (nft_set_elem_mark_busy(&he->ext)) if (nft_set_elem_mark_busy(&he->ext))
continue; continue;
...@@ -339,6 +347,14 @@ static unsigned int nft_rhash_privsize(const struct nlattr * const nla[], ...@@ -339,6 +347,14 @@ static unsigned int nft_rhash_privsize(const struct nlattr * const nla[],
return sizeof(struct nft_rhash); return sizeof(struct nft_rhash);
} }
static void nft_rhash_gc_init(const struct nft_set *set)
{
struct nft_rhash *priv = nft_set_priv(set);
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
static int nft_rhash_init(const struct nft_set *set, static int nft_rhash_init(const struct nft_set *set,
const struct nft_set_desc *desc, const struct nft_set_desc *desc,
const struct nlattr * const tb[]) const struct nlattr * const tb[])
...@@ -356,8 +372,8 @@ static int nft_rhash_init(const struct nft_set *set, ...@@ -356,8 +372,8 @@ static int nft_rhash_init(const struct nft_set *set,
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc); INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
if (set->flags & NFT_SET_TIMEOUT) if (set->flags & NFT_SET_TIMEOUT)
queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_rhash_gc_init(set);
nft_set_gc_interval(set));
return 0; return 0;
} }
...@@ -647,6 +663,7 @@ static struct nft_set_type nft_rhash_type __read_mostly = { ...@@ -647,6 +663,7 @@ static struct nft_set_type nft_rhash_type __read_mostly = {
.elemsize = offsetof(struct nft_rhash_elem, ext), .elemsize = offsetof(struct nft_rhash_elem, ext),
.estimate = nft_rhash_estimate, .estimate = nft_rhash_estimate,
.init = nft_rhash_init, .init = nft_rhash_init,
.gc_init = nft_rhash_gc_init,
.destroy = nft_rhash_destroy, .destroy = nft_rhash_destroy,
.insert = nft_rhash_insert, .insert = nft_rhash_insert,
.activate = nft_rhash_activate, .activate = nft_rhash_activate,
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_socket.h> #include <net/netfilter/nf_socket.h>
#include <net/inet_sock.h> #include <net/inet_sock.h>
#include <net/tcp.h>
struct nft_socket { struct nft_socket {
enum nft_socket_keys key:8; enum nft_socket_keys key:8;
...@@ -48,7 +49,7 @@ static void nft_socket_eval(const struct nft_expr *expr, ...@@ -48,7 +49,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
switch(priv->key) { switch(priv->key) {
case NFT_SOCKET_TRANSPARENT: case NFT_SOCKET_TRANSPARENT:
nft_reg_store8(dest, nf_sk_is_transparent(sk)); nft_reg_store8(dest, inet_sk_transparent(sk));
break; break;
default: default:
WARN_ON(1); WARN_ON(1);
......
...@@ -33,264 +33,9 @@ ...@@ -33,264 +33,9 @@
#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif #endif
#include <net/netfilter/nf_tproxy.h>
#include <linux/netfilter/xt_TPROXY.h> #include <linux/netfilter/xt_TPROXY.h>
enum nf_tproxy_lookup_t {
NFT_LOOKUP_LISTENER,
NFT_LOOKUP_ESTABLISHED,
};
static bool tproxy_sk_is_transparent(struct sock *sk)
{
switch (sk->sk_state) {
case TCP_TIME_WAIT:
if (inet_twsk(sk)->tw_transparent)
return true;
break;
case TCP_NEW_SYN_RECV:
if (inet_rsk(inet_reqsk(sk))->no_srccheck)
return true;
break;
default:
if (inet_sk(sk)->transparent)
return true;
}
sock_gen_put(sk);
return false;
}
static inline __be32
tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
{
struct in_device *indev;
__be32 laddr;
if (user_laddr)
return user_laddr;
laddr = 0;
indev = __in_dev_get_rcu(skb->dev);
for_primary_ifa(indev) {
laddr = ifa->ifa_local;
break;
} endfor_ifa(indev);
return laddr ? laddr : daddr;
}
/*
* This is used when the user wants to intercept a connection matching
* an explicit iptables rule. In this case the sockets are assumed
* matching in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple, it is returned, assuming the redirection
* already took place and we process a packet belonging to an
* established connection
*
* - match: if there's a listening socket matching the redirection
* (e.g. on-port & on-ip of the connection), it is returned,
* regardless if it was bound to 0.0.0.0 or an explicit
* address. The reasoning is that if there's an explicit rule, it
* does not really matter if the listener is bound to an interface
* or to 0. The user already stated that he wants redirection
* (since he added the rule).
*
* Please note that there's an overlap between what a TPROXY target
* and a socket match will match. Normally if you have both rules the
* "socket" match will be the first one, effectively all packets
* belonging to established connections going through that one.
*/
static inline struct sock *
nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
tcph = hp;
sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
ip_hdrlen(skb) +
__tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NFT_LOOKUP_ESTABLISHED:
sk = inet_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
return sk;
}
#ifdef XT_TPROXY_HAVE_IPV6
static inline struct sock *
nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
tcph = hp;
sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NFT_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
in->ifindex, 0);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
return sk;
}
#endif
/**
* tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
* @laddr: IPv4 address to redirect to or zero.
* @lport: TCP port to redirect to or zero.
* @sk: The TIME_WAIT TCP socket found by the lookup.
*
* We have to handle SYN packets arriving to TIME_WAIT sockets
* differently: instead of reopening the connection we should rather
* redirect the new connection to the proxy if there's a listener
* socket present.
*
* tproxy_handle_time_wait4() consumes the socket reference passed in.
*
* Returns the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
static struct sock *
tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
__be32 laddr, __be16 lport, struct sock *sk)
{
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr _hdr, *hp;
hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
if (hp == NULL) {
inet_twsk_put(inet_twsk(sk));
return NULL;
}
if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
/* SYN to a TIME_WAIT socket, we'd rather redirect it
* to a listener socket if there's one */
struct sock *sk2;
sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
return sk;
}
/* assign a socket to the skb -- consumes sk */ /* assign a socket to the skb -- consumes sk */
static void static void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
...@@ -319,26 +64,26 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, ...@@ -319,26 +64,26 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, iph->daddr, iph->saddr, iph->daddr,
hp->source, hp->dest, hp->source, hp->dest,
skb->dev, NFT_LOOKUP_ESTABLISHED); skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
laddr = tproxy_laddr4(skb, laddr, iph->daddr); laddr = nf_tproxy_laddr4(skb, laddr, iph->daddr);
if (!lport) if (!lport)
lport = hp->dest; lport = hp->dest;
/* UDP has no TCP_TIME_WAIT state, so we never enter here */ /* UDP has no TCP_TIME_WAIT state, so we never enter here */
if (sk && sk->sk_state == TCP_TIME_WAIT) if (sk && sk->sk_state == TCP_TIME_WAIT)
/* reopening a TIME_WAIT connection needs special handling */ /* reopening a TIME_WAIT connection needs special handling */
sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk); sk = nf_tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
else if (!sk) else if (!sk)
/* no, there's no established connection, check if /* no, there's no established connection, check if
* there's a listener on the redirected addr/port */ * there's a listener on the redirected addr/port */
sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr, iph->saddr, laddr,
hp->source, lport, hp->source, lport,
skb->dev, NFT_LOOKUP_LISTENER); skb->dev, NF_TPROXY_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */ /* NOTE: assign_sock consumes our sk reference */
if (sk && tproxy_sk_is_transparent(sk)) { if (sk && nf_tproxy_sk_is_transparent(sk)) {
/* This should be in a separate target, but we don't do multiple /* This should be in a separate target, but we don't do multiple
targets on the same rule yet */ targets on the same rule yet */
skb->mark = (skb->mark & ~mark_mask) ^ mark_value; skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
...@@ -377,87 +122,6 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -377,87 +122,6 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
#ifdef XT_TPROXY_HAVE_IPV6 #ifdef XT_TPROXY_HAVE_IPV6
static inline const struct in6_addr *
tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
const struct in6_addr *daddr)
{
struct inet6_dev *indev;
struct inet6_ifaddr *ifa;
struct in6_addr *laddr;
if (!ipv6_addr_any(user_laddr))
return user_laddr;
laddr = NULL;
indev = __in6_dev_get(skb->dev);
if (indev) {
read_lock_bh(&indev->lock);
list_for_each_entry(ifa, &indev->addr_list, if_list) {
if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
continue;
laddr = &ifa->addr;
break;
}
read_unlock_bh(&indev->lock);
}
return laddr ? laddr : daddr;
}
/**
* tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
* @tproto: Transport protocol.
* @thoff: Transport protocol header offset.
* @par: Iptables target parameters.
* @sk: The TIME_WAIT TCP socket found by the lookup.
*
* We have to handle SYN packets arriving to TIME_WAIT sockets
* differently: instead of reopening the connection we should rather
* redirect the new connection to the proxy if there's a listener
* socket present.
*
* tproxy_handle_time_wait6() consumes the socket reference passed in.
*
* Returns the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
static struct sock *
tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
const struct xt_action_param *par,
struct sock *sk)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr _hdr, *hp;
const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
if (hp == NULL) {
inet_twsk_put(inet_twsk(sk));
return NULL;
}
if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
/* SYN to a TIME_WAIT socket, we'd rather redirect it
* to a listener socket if there's one */
struct sock *sk2;
sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
&iph->saddr,
tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
hp->source,
tgi->lport ? tgi->lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
return sk;
}
static unsigned int static unsigned int
tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
{ {
...@@ -489,25 +153,31 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -489,25 +153,31 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
&iph->saddr, &iph->daddr, &iph->saddr, &iph->daddr,
hp->source, hp->dest, hp->source, hp->dest,
xt_in(par), NFT_LOOKUP_ESTABLISHED); xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED);
laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); laddr = nf_tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
lport = tgi->lport ? tgi->lport : hp->dest; lport = tgi->lport ? tgi->lport : hp->dest;
/* UDP has no TCP_TIME_WAIT state, so we never enter here */ /* UDP has no TCP_TIME_WAIT state, so we never enter here */
if (sk && sk->sk_state == TCP_TIME_WAIT) if (sk && sk->sk_state == TCP_TIME_WAIT) {
const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
/* reopening a TIME_WAIT connection needs special handling */ /* reopening a TIME_WAIT connection needs special handling */
sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk); sk = nf_tproxy_handle_time_wait6(skb, tproto, thoff,
xt_net(par),
&tgi->laddr.in6,
tgi->lport,
sk);
}
else if (!sk) else if (!sk)
/* no there's no established connection, check if /* no there's no established connection, check if
* there's a listener on the redirected addr/port */ * there's a listener on the redirected addr/port */
sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
tproto, &iph->saddr, laddr, tproto, &iph->saddr, laddr,
hp->source, lport, hp->source, lport,
xt_in(par), NFT_LOOKUP_LISTENER); xt_in(par), NF_TPROXY_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */ /* NOTE: assign_sock consumes our sk reference */
if (sk && tproxy_sk_is_transparent(sk)) { if (sk && nf_tproxy_sk_is_transparent(sk)) {
/* This should be in a separate target, but we don't do multiple /* This should be in a separate target, but we don't do multiple
targets on the same rule yet */ targets on the same rule yet */
skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
......
...@@ -73,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, ...@@ -73,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
* if XT_SOCKET_TRANSPARENT is used * if XT_SOCKET_TRANSPARENT is used
*/ */
if (info->flags & XT_SOCKET_TRANSPARENT) if (info->flags & XT_SOCKET_TRANSPARENT)
transparent = nf_sk_is_transparent(sk); transparent = inet_sk_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk)) transparent && sk_fullsock(sk))
...@@ -130,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) ...@@ -130,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
* if XT_SOCKET_TRANSPARENT is used * if XT_SOCKET_TRANSPARENT is used
*/ */
if (info->flags & XT_SOCKET_TRANSPARENT) if (info->flags & XT_SOCKET_TRANSPARENT)
transparent = nf_sk_is_transparent(sk); transparent = inet_sk_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk)) transparent && sk_fullsock(sk))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment