Commit 9000a457 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for your net-next tree:

1) Support for matching on ipsec policy already set in the route, from
   Florian Westphal.

2) Split set destruction into deactivate and destroy phase to make it
   fit better into the transaction infrastructure, also from Florian.
   This includes a patch to warn on imbalance when setting the new
   activate and deactivate interfaces.

3) Release transaction list from the workqueue to remove expensive
   synchronize_rcu() from configuration plane path. This speeds up
   configuration plane quite a bit. From Florian Westphal.

4) Add new xfrm/ipsec extension, this new extension allows you to match
   for ipsec tunnel keys such as source and destination address, spi and
   reqid. From Máté Eckl and Florian Westphal.

5) Add secmark support, this includes connsecmark too, patches
   from Christian Gottsche.

6) Allow to specify remaining bytes in xt_quota, from Chenbo Feng.
   One follow up patch to calm a clang warning for this one, from
   Nathan Chancellor.

7) Flush conntrack entries based on layer 3 family, from Kristian Evensen.

8) New revision for cgroups2 to shrink the path field.

9) Get rid of obsolete need_conntrack(), as a result from recent
   demodularization works.

10) Use WARN_ON instead of BUG_ON, from Florian Westphal.

11) Unused exported symbol in nf_nat_ipv4_fn(), from Florian.

12) Remove superfluous check for timeout netlink parser and dump
    functions in layer 4 conntrack helpers.

13) Unnecessary redundant rcu read side locks in NAT redirect,
    from Taehee Yoo.

14) Pass nf_hook_state structure to error handlers, patch from
    Florian Westphal.

15) Remove ->new() interface from layer 4 protocol trackers. Place
    them in the ->packet() interface. From Florian.

16) Place conntrack ->error() handling in the ->packet() interface.
    Patches from Florian Westphal.

17) Remove unused parameter in the pernet initialization path,
    also from Florian.

18) Remove additional parameter to specify layer 3 protocol when
    looking up for protocol tracker. From Florian.

19) Shrink array of layer 4 protocol trackers, from Florian.

20) Check for linear skb only once from the ALG NAT mangling
    codebase, from Taehee Yoo.

21) Use rhashtable_walk_enter() instead of deprecated
    rhashtable_walk_init(), also from Taehee.

22) No need to flush all conntracks when only one single address
    is gone, from Tan Hu.

23) Remove redundant check for NAT flags in flowtable code, from
    Taehee Yoo.

24) Use rhashtable_lookup() instead of rhashtable_lookup_fast()
    from netfilter codebase, since rcu read lock side is already
    assumed in this path.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 68049a5f ffa0a9a5
......@@ -19,7 +19,4 @@ struct ip_conntrack_stat {
unsigned int search_restart;
};
/* call to create an explicit dependency on nf_conntrack. */
void need_conntrack(void);
#endif /* _NF_CONNTRACK_COMMON_H */
......@@ -10,20 +10,17 @@
#ifndef _NF_CONNTRACK_IPV4_H
#define _NF_CONNTRACK_IPV4_H
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
#ifdef CONFIG_NF_CT_PROTO_DCCP
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp;
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp;
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite;
#endif
int nf_conntrack_ipv4_compat_init(void);
void nf_conntrack_ipv4_compat_fini(void);
#endif /*_NF_CONNTRACK_IPV4_H*/
......@@ -2,20 +2,7 @@
#ifndef _NF_CONNTRACK_IPV6_H
#define _NF_CONNTRACK_IPV6_H
extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
#ifdef CONFIG_NF_CT_PROTO_DCCP
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6;
#endif
#include <linux/sysctl.h>
extern struct ctl_table nf_ct_ipv6_sysctl_table[];
......
......@@ -20,8 +20,7 @@
/* This header is used to share core functionality between the
standalone connection tracking module, and the compatibility layer's use
of connection tracking. */
unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb);
unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state);
int nf_conntrack_init_net(struct net *net);
void nf_conntrack_cleanup_net(struct net *net);
......
......@@ -18,9 +18,6 @@
struct seq_file;
struct nf_conntrack_l4proto {
/* L3 Protocol number. */
u_int16_t l3proto;
/* L4 Protocol number. */
u_int8_t l4proto;
......@@ -43,22 +40,14 @@ struct nf_conntrack_l4proto {
/* Returns verdict for packet, or -1 for invalid. */
int (*packet)(struct nf_conn *ct,
const struct sk_buff *skb,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo);
/* Called when a new connection for this protocol found;
* returns TRUE if it's OK. If so, packet() called next. */
bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff);
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state);
/* Called when a conntrack entry is destroyed */
void (*destroy)(struct nf_conn *ct);
int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
unsigned int dataoff,
u_int8_t pf, unsigned int hooknum);
/* called by gc worker if table is full */
bool (*can_early_drop)(const struct nf_conn *ct);
......@@ -92,7 +81,7 @@ struct nf_conntrack_l4proto {
#endif
unsigned int *net_id;
/* Init l4proto pernet data */
int (*init_net)(struct net *net, u_int16_t proto);
int (*init_net)(struct net *net);
/* Return the per-net protocol part. */
struct nf_proto_net *(*get_net_proto)(struct net *net);
......@@ -101,16 +90,23 @@ struct nf_conntrack_l4proto {
struct module *me;
};
int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
const struct nf_hook_state *state);
int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
const struct nf_hook_state *state);
/* Existing built-in generic protocol */
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
#define MAX_NF_CT_PROTO 256
#define MAX_NF_CT_PROTO IPPROTO_UDPLITE
const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
u_int8_t l4proto);
const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto);
const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
u_int8_t l4proto);
const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto);
void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
/* Protocol pernet registration. */
......
......@@ -470,6 +470,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding);
void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding);
void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding);
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
/**
* enum nft_set_extensions - set extension type IDs
......@@ -724,7 +727,9 @@ struct nft_expr_type {
* @eval: Expression evaluation function
* @size: full expression size, including private data size
* @init: initialization function
* @destroy: destruction function
* @activate: activate expression in the next generation
* @deactivate: deactivate expression in next generation
* @destroy: destruction function, called after synchronize_rcu
* @dump: function to dump parameters
* @type: expression type
* @validate: validate expression, called during loop detection
......@@ -1293,12 +1298,14 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
*
* @list: used internally
* @msg_type: message type
* @put_net: ctx->net needs to be put
* @ctx: transaction context
* @data: internal information related to the transaction
*/
struct nft_trans {
struct list_head list;
int msg_type;
bool put_net;
struct nft_ctx ctx;
char data[0];
};
......
......@@ -16,6 +16,10 @@ extern struct nft_expr_type nft_meta_type;
extern struct nft_expr_type nft_rt_type;
extern struct nft_expr_type nft_exthdr_type;
#ifdef CONFIG_NETWORK_SECMARK
extern struct nft_object_type nft_secmark_obj_type;
#endif
int nf_tables_core_module_init(void);
void nf_tables_core_module_exit(void);
......
......@@ -826,12 +826,14 @@ enum nft_meta_keys {
* @NFT_RT_NEXTHOP4: routing nexthop for IPv4
* @NFT_RT_NEXTHOP6: routing nexthop for IPv6
* @NFT_RT_TCPMSS: fetch current path tcp mss
* @NFT_RT_XFRM: boolean, skb->dst->xfrm != NULL
*/
enum nft_rt_keys {
NFT_RT_CLASSID,
NFT_RT_NEXTHOP4,
NFT_RT_NEXTHOP6,
NFT_RT_TCPMSS,
NFT_RT_XFRM,
__NFT_RT_MAX
};
#define NFT_RT_MAX (__NFT_RT_MAX - 1)
......@@ -1174,6 +1176,21 @@ enum nft_quota_attributes {
};
#define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1)
/**
* enum nft_secmark_attributes - nf_tables secmark object netlink attributes
*
* @NFTA_SECMARK_CTX: security context (NLA_STRING)
*/
enum nft_secmark_attributes {
NFTA_SECMARK_UNSPEC,
NFTA_SECMARK_CTX,
__NFTA_SECMARK_MAX,
};
#define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1)
/* Max security context length */
#define NFT_SECMARK_CTX_MAXLEN 256
/**
* enum nft_reject_types - nf_tables reject expression reject types
*
......@@ -1430,7 +1447,8 @@ enum nft_ct_timeout_timeout_attributes {
#define NFT_OBJECT_CONNLIMIT 5
#define NFT_OBJECT_TUNNEL 6
#define NFT_OBJECT_CT_TIMEOUT 7
#define __NFT_OBJECT_MAX 8
#define NFT_OBJECT_SECMARK 8
#define __NFT_OBJECT_MAX 9
#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1)
/**
......@@ -1512,6 +1530,35 @@ enum nft_devices_attributes {
};
#define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1)
/*
* enum nft_xfrm_attributes - nf_tables xfrm expr netlink attributes
*
* @NFTA_XFRM_DREG: destination register (NLA_U32)
* @NFTA_XFRM_KEY: enum nft_xfrm_keys (NLA_U32)
* @NFTA_XFRM_DIR: direction (NLA_U8)
* @NFTA_XFRM_SPNUM: index in secpath array (NLA_U32)
*/
enum nft_xfrm_attributes {
NFTA_XFRM_UNSPEC,
NFTA_XFRM_DREG,
NFTA_XFRM_KEY,
NFTA_XFRM_DIR,
NFTA_XFRM_SPNUM,
__NFTA_XFRM_MAX
};
#define NFTA_XFRM_MAX (__NFTA_XFRM_MAX - 1)
enum nft_xfrm_keys {
NFT_XFRM_KEY_UNSPEC,
NFT_XFRM_KEY_DADDR_IP4,
NFT_XFRM_KEY_DADDR_IP6,
NFT_XFRM_KEY_SADDR_IP4,
NFT_XFRM_KEY_SADDR_IP6,
NFT_XFRM_KEY_REQID,
NFT_XFRM_KEY_SPI,
__NFT_XFRM_KEY_MAX,
};
#define NFT_XFRM_KEY_MAX (__NFT_XFRM_KEY_MAX - 1)
/**
* enum nft_trace_attributes - nf_tables trace netlink attributes
......
......@@ -22,4 +22,20 @@ struct xt_cgroup_info_v1 {
void *priv __attribute__((aligned(8)));
};
#define XT_CGROUP_PATH_MAX 512
struct xt_cgroup_info_v2 {
__u8 has_path;
__u8 has_classid;
__u8 invert_path;
__u8 invert_classid;
union {
char path[XT_CGROUP_PATH_MAX];
__u32 classid;
};
/* kernel internal data */
void *priv __attribute__((aligned(8)));
};
#endif /* _UAPI_XT_CGROUP_H */
......@@ -15,9 +15,11 @@ struct xt_quota_info {
__u32 flags;
__u32 pad;
__aligned_u64 quota;
/* Used internally by the kernel */
struct xt_quota_priv *master;
#ifdef __KERNEL__
atomic64_t counter;
#else
__aligned_u64 remain;
#endif
};
#endif /* _XT_QUOTA_H */
......@@ -264,7 +264,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
return nf_nat_inet_fn(priv, skb, state);
}
EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
static unsigned int
nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
......
......@@ -104,12 +104,26 @@ static int masq_device_event(struct notifier_block *this,
return NOTIFY_DONE;
}
static int inet_cmp(struct nf_conn *ct, void *ptr)
{
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct net_device *dev = ifa->ifa_dev->dev;
struct nf_conntrack_tuple *tuple;
if (!device_cmp(ct, (void *)(long)dev->ifindex))
return 0;
tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
return ifa->ifa_address == tuple->dst.u3.ip;
}
static int masq_inet_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
struct netdev_notifier_info info;
struct net *net = dev_net(idev->dev);
/* The masq_dev_notifier will catch the case of the device going
* down. So if the inetdev is dead and being destroyed we have
......@@ -119,8 +133,10 @@ static int masq_inet_event(struct notifier_block *this,
if (idev->dead)
return NOTIFY_DONE;
netdev_notifier_info_init(&info, idev->dev);
return masq_device_event(this, event, &info);
if (event == NETDEV_DOWN)
nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
return NOTIFY_DONE;
}
static struct notifier_block masq_dev_notifier = {
......
......@@ -65,7 +65,10 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
}
hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
BUG_ON(hp == NULL);
if (!hp) {
par->hotdrop = true;
return false;
}
/* Calculate the header length */
if (nexthdr == NEXTHDR_FRAGMENT)
......
......@@ -137,7 +137,10 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
sizeof(_addr),
&_addr);
BUG_ON(ap == NULL);
if (ap == NULL) {
par->hotdrop = true;
return false;
}
if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
pr_debug("i=%d temp=%d;\n", i, temp);
......@@ -166,7 +169,10 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+ temp * sizeof(_addr),
sizeof(_addr),
&_addr);
BUG_ON(ap == NULL);
if (ap == NULL) {
par->hotdrop = true;
return false;
}
if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
break;
......
......@@ -87,18 +87,30 @@ static struct notifier_block masq_dev_notifier = {
struct masq_dev_work {
struct work_struct work;
struct net *net;
struct in6_addr addr;
int ifindex;
};
static int inet_cmp(struct nf_conn *ct, void *work)
{
struct masq_dev_work *w = (struct masq_dev_work *)work;
struct nf_conntrack_tuple *tuple;
if (!device_cmp(ct, (void *)(long)w->ifindex))
return 0;
tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
}
static void iterate_cleanup_work(struct work_struct *work)
{
struct masq_dev_work *w;
long index;
w = container_of(work, struct masq_dev_work, work);
index = w->ifindex;
nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0);
nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0);
put_net(w->net);
kfree(w);
......@@ -147,6 +159,7 @@ static int masq_inet_event(struct notifier_block *this,
INIT_WORK(&w->work, iterate_cleanup_work);
w->ifindex = dev->ifindex;
w->net = net;
w->addr = ifa->addr;
schedule_work(&w->work);
return NOTIFY_DONE;
......
......@@ -625,6 +625,13 @@ config NFT_FIB_INET
The lookup will be delegated to the IPv4 or IPv6 FIB depending
on the protocol of the packet.
config NFT_XFRM
tristate "Netfilter nf_tables xfrm/IPSec security association matching"
depends on XFRM
help
This option adds an expression that you can use to extract properties
of a packets security association.
config NFT_SOCKET
tristate "Netfilter nf_tables socket match support"
depends on IPV6 || IPV6=n
......
......@@ -113,6 +113,7 @@ obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
obj-$(CONFIG_NFT_SOCKET) += nft_socket.o
obj-$(CONFIG_NFT_OSF) += nft_osf.o
obj-$(CONFIG_NFT_TPROXY) += nft_tproxy.o
obj-$(CONFIG_NFT_XFRM) += nft_xfrm.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
......
......@@ -379,7 +379,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
return false;
}
l4proto = __nf_ct_l4proto_find(l3num, protonum);
l4proto = __nf_ct_l4proto_find(protonum);
ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
l4proto);
......@@ -539,7 +539,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_ct_tmpl_free(ct);
return;
}
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->destroy)
l4proto->destroy(ct);
......@@ -840,7 +840,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
enum ip_conntrack_info oldinfo;
struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->allow_clash &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
......@@ -1109,7 +1109,7 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
return true;
......@@ -1370,12 +1370,6 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
if (!l4proto->new(ct, skb, dataoff)) {
nf_conntrack_free(ct);
pr_debug("can't track with proto module\n");
return NULL;
}
if (timeout_ext)
nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
GFP_ATOMIC);
......@@ -1436,12 +1430,12 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
/* On success, returns 0, sets skb->_nfct | ctinfo */
static int
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
resolve_normal_ct(struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
const struct nf_conntrack_l4proto *l4proto)
const struct nf_conntrack_l4proto *l4proto,
const struct nf_hook_state *state)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
......@@ -1452,17 +1446,18 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, net, &tuple, l4proto)) {
dataoff, state->pf, protonum, state->net,
&tuple, l4proto)) {
pr_debug("Can't get tuple\n");
return 0;
}
/* look for tuple match */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
hash = hash_conntrack_raw(&tuple, net);
h = __nf_conntrack_find_get(net, zone, &tuple, hash);
hash = hash_conntrack_raw(&tuple, state->net);
h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
if (!h) {
h = init_conntrack(net, tmpl, &tuple, l4proto,
h = init_conntrack(state->net, tmpl, &tuple, l4proto,
skb, dataoff, hash);
if (!h)
return 0;
......@@ -1491,13 +1486,45 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
return 0;
}
/*
* icmp packets need special treatment to handle error messages that are
* related to a connection.
*
* Callers need to check if skb has a conntrack assigned when this
* helper returns; in such case skb belongs to an already known connection.
*/
static unsigned int __cold
nf_conntrack_handle_icmp(struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u8 protonum,
const struct nf_hook_state *state)
{
int ret;
if (state->pf == NFPROTO_IPV4 && protonum == IPPROTO_ICMP)
ret = nf_conntrack_icmpv4_error(tmpl, skb, dataoff, state);
#if IS_ENABLED(CONFIG_IPV6)
else if (state->pf == NFPROTO_IPV6 && protonum == IPPROTO_ICMPV6)
ret = nf_conntrack_icmpv6_error(tmpl, skb, dataoff, state);
#endif
else
return NF_ACCEPT;
if (ret <= 0) {
NF_CT_STAT_INC_ATOMIC(state->net, error);
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
}
return ret;
}
unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
{
const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct, *tmpl;
u_int8_t protonum;
int dataoff, ret;
......@@ -1506,32 +1533,28 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
/* Previously seen (loopback or untracked)? Ignore. */
if ((tmpl && !nf_ct_is_template(tmpl)) ||
ctinfo == IP_CT_UNTRACKED) {
NF_CT_STAT_INC_ATOMIC(net, ignore);
NF_CT_STAT_INC_ATOMIC(state->net, ignore);
return NF_ACCEPT;
}
skb->_nfct = 0;
}
/* rcu_read_lock()ed by nf_hook_thresh */
dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum);
if (dataoff <= 0) {
pr_debug("not prepared to track yet or error occurred\n");
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
NF_CT_STAT_INC_ATOMIC(state->net, error);
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
ret = NF_ACCEPT;
goto out;
}
l4proto = __nf_ct_l4proto_find(pf, protonum);
l4proto = __nf_ct_l4proto_find(protonum);
/* It may be an special packet, error, unclean...
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
if (l4proto->error != NULL) {
ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum);
if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) {
ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff,
protonum, state);
if (ret <= 0) {
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
goto out;
}
......@@ -1540,10 +1563,11 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
repeat:
ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
ret = resolve_normal_ct(tmpl, skb, dataoff,
protonum, l4proto, state);
if (ret < 0) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
NF_CT_STAT_INC_ATOMIC(state->net, drop);
ret = NF_DROP;
goto out;
}
......@@ -1551,21 +1575,21 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
ct = nf_ct_get(skb, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
NF_CT_STAT_INC_ATOMIC(net, invalid);
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
ret = NF_ACCEPT;
goto out;
}
ret = l4proto->packet(ct, skb, dataoff, ctinfo);
ret = l4proto->packet(ct, skb, dataoff, ctinfo, state);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_conntrack_put(&ct->ct_general);
skb->_nfct = 0;
NF_CT_STAT_INC_ATOMIC(net, invalid);
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
if (ret == -NF_DROP)
NF_CT_STAT_INC_ATOMIC(net, drop);
NF_CT_STAT_INC_ATOMIC(state->net, drop);
/* Special case: TCP tracker reports an attempt to reopen a
* closed/aborted connection. We have to go back and create a
* fresh conntrack.
......@@ -1594,8 +1618,7 @@ bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
rcu_read_lock();
ret = nf_ct_invert_tuple(inverse, orig,
__nf_ct_l4proto_find(orig->src.l3num,
orig->dst.protonum));
__nf_ct_l4proto_find(orig->dst.protonum));
rcu_read_unlock();
return ret;
}
......@@ -1752,7 +1775,7 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
if (dataoff <= 0)
return -1;
l4proto = nf_ct_l4proto_find_get(l3num, l4num);
l4proto = nf_ct_l4proto_find_get(l4num);
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
l4num, net, &tuple, l4proto))
......
......@@ -610,8 +610,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
expect->tuple.src.l3num,
expect->tuple.dst.protonum);
print_tuple(s, &expect->tuple,
__nf_ct_l4proto_find(expect->tuple.src.l3num,
expect->tuple.dst.protonum));
__nf_ct_l4proto_find(expect->tuple.dst.protonum));
if (expect->flags & NF_CT_EXPECT_PERMANENT) {
seq_puts(s, "PERMANENT");
......
......@@ -135,8 +135,7 @@ static int ctnetlink_dump_tuples(struct sk_buff *skb,
ret = ctnetlink_dump_tuples_ip(skb, tuple);
if (ret >= 0) {
l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
tuple->dst.protonum);
l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto);
}
rcu_read_unlock();
......@@ -184,7 +183,7 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
struct nlattr *nest_proto;
int ret;
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
if (!l4proto->to_nlattr)
return 0;
......@@ -592,7 +591,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
len *= 3u; /* ORIG, REPLY, MASTER */
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
len += l4proto->nlattr_size;
if (l4proto->nlattr_tuple_size) {
len4 = l4proto->nlattr_tuple_size();
......@@ -821,6 +820,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
}
struct ctnetlink_filter {
u8 family;
struct {
u_int32_t val;
u_int32_t mask;
......@@ -828,31 +828,39 @@ struct ctnetlink_filter {
};
static struct ctnetlink_filter *
ctnetlink_alloc_filter(const struct nlattr * const cda[])
ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
{
#ifdef CONFIG_NF_CONNTRACK_MARK
struct ctnetlink_filter *filter;
#ifndef CONFIG_NF_CONNTRACK_MARK
if (cda[CTA_MARK] && cda[CTA_MARK_MASK])
return ERR_PTR(-EOPNOTSUPP);
#endif
filter = kzalloc(sizeof(*filter), GFP_KERNEL);
if (filter == NULL)
return ERR_PTR(-ENOMEM);
filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
filter->family = family;
return filter;
#else
return ERR_PTR(-EOPNOTSUPP);
#ifdef CONFIG_NF_CONNTRACK_MARK
if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
}
#endif
return filter;
}
static int ctnetlink_start(struct netlink_callback *cb)
{
const struct nlattr * const *cda = cb->data;
struct ctnetlink_filter *filter = NULL;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u8 family = nfmsg->nfgen_family;
if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
filter = ctnetlink_alloc_filter(cda);
if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
filter = ctnetlink_alloc_filter(cda, family);
if (IS_ERR(filter))
return PTR_ERR(filter);
}
......@@ -866,13 +874,24 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
struct ctnetlink_filter *filter = data;
if (filter == NULL)
return 1;
goto out;
/* Match entries of a given L3 protocol number.
* If it is not specified, ie. l3proto == 0,
* then match everything.
*/
if (filter->family && nf_ct_l3num(ct) != filter->family)
goto ignore_entry;
#ifdef CONFIG_NF_CONNTRACK_MARK
if ((ct->mark & filter->mark.mask) == filter->mark.val)
return 1;
if ((ct->mark & filter->mark.mask) != filter->mark.val)
goto ignore_entry;
#endif
out:
return 1;
ignore_entry:
return 0;
}
......@@ -883,8 +902,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
struct nf_conn *nf_ct_evict[8];
int res, i;
spinlock_t *lockp;
......@@ -923,11 +940,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
if (!net_eq(net, nf_ct_net(ct)))
continue;
/* Dump entries of a given L3 protocol number.
* If it is not specified, ie. l3proto == 0,
* then dump everything. */
if (l3proto && nf_ct_l3num(ct) != l3proto)
continue;
if (cb->args[1]) {
if (ct != last)
continue;
......@@ -1048,7 +1060,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
rcu_read_lock();
l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
if (likely(l4proto->nlattr_to_tuple)) {
ret = nla_validate_nested(attr, CTA_PROTO_MAX,
......@@ -1213,12 +1225,12 @@ static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
static int ctnetlink_flush_conntrack(struct net *net,
const struct nlattr * const cda[],
u32 portid, int report)
u32 portid, int report, u8 family)
{
struct ctnetlink_filter *filter = NULL;
if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
filter = ctnetlink_alloc_filter(cda);
if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
filter = ctnetlink_alloc_filter(cda, family);
if (IS_ERR(filter))
return PTR_ERR(filter);
}
......@@ -1257,7 +1269,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
else {
return ctnetlink_flush_conntrack(net, cda,
NETLINK_CB(skb).portid,
nlmsg_report(nlh));
nlmsg_report(nlh), u3);
}
if (err < 0)
......@@ -1696,7 +1708,7 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
return err;
rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->from_nlattr)
err = l4proto->from_nlattr(tb, ct);
rcu_read_unlock();
......@@ -2656,8 +2668,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
rcu_read_lock();
ret = ctnetlink_dump_tuples_ip(skb, &m);
if (ret >= 0) {
l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
tuple->dst.protonum);
l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
}
rcu_read_unlock();
......
......@@ -43,7 +43,7 @@
extern unsigned int nf_conntrack_net_id;
static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
static struct nf_conntrack_l4proto __rcu *nf_ct_protos[MAX_NF_CT_PROTO + 1] __read_mostly;
static DEFINE_MUTEX(nf_ct_proto_mutex);
......@@ -124,23 +124,21 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
#endif
const struct nf_conntrack_l4proto *
__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto)
{
if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
if (unlikely(l4proto >= ARRAY_SIZE(nf_ct_protos)))
return &nf_conntrack_l4proto_generic;
return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
return rcu_dereference(nf_ct_protos[l4proto]);
}
EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
const struct nf_conntrack_l4proto *
nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4num)
{
const struct nf_conntrack_l4proto *p;
rcu_read_lock();
p = __nf_ct_l4proto_find(l3num, l4num);
p = __nf_ct_l4proto_find(l4num);
if (!try_module_get(p->me))
p = &nf_conntrack_l4proto_generic;
rcu_read_unlock();
......@@ -159,8 +157,7 @@ static int kill_l4proto(struct nf_conn *i, void *data)
{
const struct nf_conntrack_l4proto *l4proto;
l4proto = data;
return nf_ct_protonum(i) == l4proto->l4proto &&
nf_ct_l3num(i) == l4proto->l3proto;
return nf_ct_protonum(i) == l4proto->l4proto;
}
static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
......@@ -219,48 +216,20 @@ int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
return -EBUSY;
if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
(l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
return -EINVAL;
mutex_lock(&nf_ct_proto_mutex);
if (!nf_ct_protos[l4proto->l3proto]) {
/* l3proto may be loaded latter. */
struct nf_conntrack_l4proto __rcu **proto_array;
int i;
proto_array =
kmalloc_array(MAX_NF_CT_PROTO,
sizeof(struct nf_conntrack_l4proto *),
GFP_KERNEL);
if (proto_array == NULL) {
ret = -ENOMEM;
goto out_unlock;
}
for (i = 0; i < MAX_NF_CT_PROTO; i++)
RCU_INIT_POINTER(proto_array[i],
&nf_conntrack_l4proto_generic);
/* Before making proto_array visible to lockless readers,
* we must make sure its content is committed to memory.
*/
smp_wmb();
nf_ct_protos[l4proto->l3proto] = proto_array;
} else if (rcu_dereference_protected(
nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
if (rcu_dereference_protected(
nf_ct_protos[l4proto->l4proto],
lockdep_is_held(&nf_ct_proto_mutex)
) != &nf_conntrack_l4proto_generic) {
ret = -EBUSY;
goto out_unlock;
}
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
l4proto);
rcu_assign_pointer(nf_ct_protos[l4proto->l4proto], l4proto);
out_unlock:
mutex_unlock(&nf_ct_proto_mutex);
return ret;
......@@ -274,7 +243,7 @@ int nf_ct_l4proto_pernet_register_one(struct net *net,
struct nf_proto_net *pn = NULL;
if (l4proto->init_net) {
ret = l4proto->init_net(net, l4proto->l3proto);
ret = l4proto->init_net(net);
if (ret < 0)
goto out;
}
......@@ -296,13 +265,13 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
{
BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
BUG_ON(l4proto->l4proto >= ARRAY_SIZE(nf_ct_protos));
BUG_ON(rcu_dereference_protected(
nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
nf_ct_protos[l4proto->l4proto],
lockdep_is_held(&nf_ct_proto_mutex)
) != l4proto);
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
rcu_assign_pointer(nf_ct_protos[l4proto->l4proto],
&nf_conntrack_l4proto_generic);
}
......@@ -352,7 +321,7 @@ static int
nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
int ret = -EINVAL, ver;
int ret = -EINVAL;
unsigned int i;
for (i = 0; i < num_proto; i++) {
......@@ -361,9 +330,8 @@ nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
break;
}
if (i != num_proto) {
ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
ver, l4proto[i]->l4proto);
pr_err("nf_conntrack: can't register l4 %d proto.\n",
l4proto[i]->l4proto);
nf_ct_l4proto_unregister(l4proto, i);
}
return ret;
......@@ -382,9 +350,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
break;
}
if (i != num_proto) {
pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
l4proto[i]->l4proto,
l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
pr_err("nf_conntrack %d: pernet registration failed\n",
l4proto[i]->l4proto);
nf_ct_l4proto_pernet_unregister(net, l4proto, i);
}
return ret;
......@@ -455,7 +422,7 @@ static unsigned int ipv4_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
return nf_conntrack_in(skb, state);
}
static unsigned int ipv4_conntrack_local(void *priv,
......@@ -477,7 +444,7 @@ static unsigned int ipv4_conntrack_local(void *priv,
return NF_ACCEPT;
}
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
return nf_conntrack_in(skb, state);
}
/* Connection tracking may drop packets, but never alters them, so
......@@ -690,14 +657,14 @@ static unsigned int ipv6_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
return nf_conntrack_in(skb, state);
}
static unsigned int ipv6_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
return nf_conntrack_in(skb, state);
}
static unsigned int ipv6_helper(void *priv,
......@@ -911,37 +878,26 @@ void nf_ct_netns_put(struct net *net, uint8_t nfproto)
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
&nf_conntrack_l4proto_tcp4,
&nf_conntrack_l4proto_udp4,
&nf_conntrack_l4proto_tcp,
&nf_conntrack_l4proto_udp,
&nf_conntrack_l4proto_icmp,
#ifdef CONFIG_NF_CT_PROTO_DCCP
&nf_conntrack_l4proto_dccp4,
&nf_conntrack_l4proto_dccp,
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
&nf_conntrack_l4proto_sctp4,
&nf_conntrack_l4proto_sctp,
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
&nf_conntrack_l4proto_udplite4,
&nf_conntrack_l4proto_udplite,
#endif
#if IS_ENABLED(CONFIG_IPV6)
&nf_conntrack_l4proto_tcp6,
&nf_conntrack_l4proto_udp6,
&nf_conntrack_l4proto_icmpv6,
#ifdef CONFIG_NF_CT_PROTO_DCCP
&nf_conntrack_l4proto_dccp6,
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
&nf_conntrack_l4proto_sctp6,
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
&nf_conntrack_l4proto_udplite6,
#endif
#endif /* CONFIG_IPV6 */
};
int nf_conntrack_proto_init(void)
{
int ret = 0;
int ret = 0, i;
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0)
......@@ -952,6 +908,11 @@ int nf_conntrack_proto_init(void)
if (ret < 0)
goto cleanup_sockopt;
#endif
for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
RCU_INIT_POINTER(nf_ct_protos[i],
&nf_conntrack_l4proto_generic);
ret = nf_ct_l4proto_register(builtin_l4proto,
ARRAY_SIZE(builtin_l4proto));
if (ret < 0)
......@@ -969,17 +930,10 @@ int nf_conntrack_proto_init(void)
void nf_conntrack_proto_fini(void)
{
unsigned int i;
nf_unregister_sockopt(&so_getorigdst);
#if IS_ENABLED(CONFIG_IPV6)
nf_unregister_sockopt(&so_getorigdst6);
#endif
/* No need to call nf_ct_l4proto_unregister(), the register
* tables are free'd here anyway.
*/
for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
kfree(nf_ct_protos[i]);
}
int nf_conntrack_proto_pernet_init(struct net *net)
......@@ -988,8 +942,7 @@ int nf_conntrack_proto_pernet_init(struct net *net)
struct nf_proto_net *pn = nf_ct_l4proto_net(net,
&nf_conntrack_l4proto_generic);
err = nf_conntrack_l4proto_generic.init_net(net,
nf_conntrack_l4proto_generic.l3proto);
err = nf_conntrack_l4proto_generic.init_net(net);
if (err < 0)
return err;
err = nf_ct_l4proto_register_sysctl(net,
......
......@@ -389,18 +389,15 @@ static inline struct nf_dccp_net *dccp_pernet(struct net *net)
return &net->ct.nf_ct_proto.dccp;
}
static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff)
static noinline bool
dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
const struct dccp_hdr *dh)
{
struct net *net = nf_ct_net(ct);
struct nf_dccp_net *dn;
struct dccp_hdr _dh, *dh;
const char *msg;
u_int8_t state;
dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
BUG_ON(dh == NULL);
state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
switch (state) {
default:
......@@ -438,8 +435,51 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
ntohl(dhack->dccph_ack_nr_low);
}
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo)
static bool dccp_error(const struct dccp_hdr *dh,
struct sk_buff *skb, unsigned int dataoff,
const struct nf_hook_state *state)
{
unsigned int dccp_len = skb->len - dataoff;
unsigned int cscov;
const char *msg;
if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
dh->dccph_doff * 4 > dccp_len) {
msg = "nf_ct_dccp: truncated/malformed packet ";
goto out_invalid;
}
cscov = dccp_len;
if (dh->dccph_cscov) {
cscov = (dh->dccph_cscov - 1) * 4;
if (cscov > dccp_len) {
msg = "nf_ct_dccp: bad checksum coverage ";
goto out_invalid;
}
}
if (state->hook == NF_INET_PRE_ROUTING &&
state->net->ct.sysctl_checksum &&
nf_checksum_partial(skb, state->hook, dataoff, cscov,
IPPROTO_DCCP, state->pf)) {
msg = "nf_ct_dccp: bad checksum ";
goto out_invalid;
}
if (dh->dccph_type >= DCCP_PKT_INVALID) {
msg = "nf_ct_dccp: reserved packet type ";
goto out_invalid;
}
return false;
out_invalid:
nf_l4proto_log_invalid(skb, state->net, state->pf,
IPPROTO_DCCP, "%s", msg);
return true;
}
static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
......@@ -448,8 +488,15 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int *timeouts;
dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
BUG_ON(dh == NULL);
if (!dh)
return NF_DROP;
if (dccp_error(dh, skb, dataoff, state))
return -NF_ACCEPT;
type = dh->dccph_type;
if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh))
return -NF_ACCEPT;
if (type == DCCP_PKT_RESET &&
!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
......@@ -527,55 +574,6 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
return NF_ACCEPT;
}
static int dccp_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb, unsigned int dataoff,
u_int8_t pf, unsigned int hooknum)
{
struct dccp_hdr _dh, *dh;
unsigned int dccp_len = skb->len - dataoff;
unsigned int cscov;
const char *msg;
dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
if (dh == NULL) {
msg = "nf_ct_dccp: short packet ";
goto out_invalid;
}
if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
dh->dccph_doff * 4 > dccp_len) {
msg = "nf_ct_dccp: truncated/malformed packet ";
goto out_invalid;
}
cscov = dccp_len;
if (dh->dccph_cscov) {
cscov = (dh->dccph_cscov - 1) * 4;
if (cscov > dccp_len) {
msg = "nf_ct_dccp: bad checksum coverage ";
goto out_invalid;
}
}
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP,
pf)) {
msg = "nf_ct_dccp: bad checksum ";
goto out_invalid;
}
if (dh->dccph_type >= DCCP_PKT_INVALID) {
msg = "nf_ct_dccp: reserved packet type ";
goto out_invalid;
}
return NF_ACCEPT;
out_invalid:
nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
return -NF_ACCEPT;
}
static bool dccp_can_early_drop(const struct nf_conn *ct)
{
switch (ct->proto.dccp.state) {
......@@ -814,7 +812,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
return 0;
}
static int dccp_init_net(struct net *net, u_int16_t proto)
static int dccp_init_net(struct net *net)
{
struct nf_dccp_net *dn = dccp_pernet(net);
struct nf_proto_net *pn = &dn->pn;
......@@ -844,45 +842,9 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.dccp.pn;
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
.new = dccp_new,
.packet = dccp_packet,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_size = DCCP_NLATTR_SIZE,
.to_nlattr = dccp_to_nlattr,
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
.ctnl_timeout = {
.nlattr_to_obj = dccp_timeout_nlattr_to_obj,
.obj_to_nlattr = dccp_timeout_obj_to_nlattr,
.nlattr_max = CTA_TIMEOUT_DCCP_MAX,
.obj_size = sizeof(unsigned int) * CT_DCCP_MAX,
.nla_policy = dccp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
.init_net = dccp_init_net,
.get_net_proto = dccp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
.l3proto = AF_INET6,
const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
.l4proto = IPPROTO_DCCP,
.new = dccp_new,
.packet = dccp_packet,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
......@@ -908,4 +870,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
.init_net = dccp_init_net,
.get_net_proto = dccp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6);
......@@ -44,12 +44,19 @@ static bool generic_pkt_to_tuple(const struct sk_buff *skb,
/* Returns verdict for packet, or -1 for invalid. */
static int generic_packet(struct nf_conn *ct,
const struct sk_buff *skb,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo)
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
const unsigned int *timeout = nf_ct_timeout_lookup(ct);
if (!nf_generic_should_process(nf_ct_protonum(ct))) {
pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
nf_ct_protonum(ct));
return -NF_ACCEPT;
}
if (!timeout)
timeout = &generic_pernet(nf_ct_net(ct))->timeout;
......@@ -57,19 +64,6 @@ static int generic_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff)
{
bool ret;
ret = nf_generic_should_process(nf_ct_protonum(ct));
if (!ret)
pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
nf_ct_protonum(ct));
return ret;
}
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
#include <linux/netfilter/nfnetlink.h>
......@@ -142,7 +136,7 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
static int generic_init_net(struct net *net, u_int16_t proto)
static int generic_init_net(struct net *net)
{
struct nf_generic_net *gn = generic_pernet(net);
struct nf_proto_net *pn = &gn->pn;
......@@ -159,11 +153,9 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
{
.l3proto = PF_UNSPEC,
.l4proto = 255,
.pkt_to_tuple = generic_pkt_to_tuple,
.packet = generic_packet,
.new = generic_new,
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
.ctnl_timeout = {
.nlattr_to_obj = generic_timeout_nlattr_to_obj,
......
......@@ -233,10 +233,26 @@ static unsigned int *gre_get_timeouts(struct net *net)
/* Returns verdict for packet, and may modify conntrack */
static int gre_packet(struct nf_conn *ct,
const struct sk_buff *skb,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo)
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
if (state->pf != NFPROTO_IPV4)
return -NF_ACCEPT;
if (!nf_ct_is_confirmed(ct)) {
unsigned int *timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = gre_get_timeouts(nf_ct_net(ct));
/* initialize to sane value. Ideally a conntrack helper
* (e.g. in case of pptp) is increasing them */
ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED];
ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
}
/* If we've seen traffic both ways, this is a GRE connection.
* Extend timeout. */
if (ct->status & IPS_SEEN_REPLY) {
......@@ -252,26 +268,6 @@ static int gre_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff)
{
unsigned int *timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = gre_get_timeouts(nf_ct_net(ct));
pr_debug(": ");
nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
/* initialize to sane value. Ideally a conntrack helper
* (e.g. in case of pptp) is increasing them */
ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED];
ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
return true;
}
/* Called when a conntrack entry has already been removed from the hashes
* and is about to be deleted from memory */
static void gre_destroy(struct nf_conn *ct)
......@@ -336,7 +332,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
static int gre_init_net(struct net *net, u_int16_t proto)
static int gre_init_net(struct net *net)
{
struct netns_proto_gre *net_gre = gre_pernet(net);
int i;
......@@ -351,14 +347,12 @@ static int gre_init_net(struct net *net, u_int16_t proto)
/* protocol helper struct */
static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
.pkt_to_tuple = gre_pkt_to_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
#endif
.packet = gre_packet,
.new = gre_new,
.destroy = gre_destroy,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
......
......@@ -72,34 +72,17 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
static unsigned int *icmp_get_timeouts(struct net *net)
{
return &icmp_pernet(net)->timeout;
}
/* Returns verdict for packet, or -1 for invalid. */
static int icmp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo)
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
/* Do not immediately delete the connection after the first
successful reply to avoid excessive conntrackd traffic
and also to handle correctly ICMP echo reply duplicates. */
unsigned int *timeout = nf_ct_timeout_lookup(ct);
if (!timeout)
timeout = icmp_get_timeouts(nf_ct_net(ct));
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff)
{
static const u_int8_t valid_new[] = {
[ICMP_ECHO] = 1,
[ICMP_TIMESTAMP] = 1,
......@@ -107,21 +90,29 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
[ICMP_ADDRESS] = 1
};
if (state->pf != NFPROTO_IPV4)
return -NF_ACCEPT;
if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
!valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
/* Can't create a new ICMP `conn' with this. */
pr_debug("icmp: can't create new conn with type %u\n",
ct->tuplehash[0].tuple.dst.u.icmp.type);
nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
return false;
return -NF_ACCEPT;
}
return true;
if (!timeout)
timeout = &icmp_pernet(nf_ct_net(ct))->timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
}
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
static int
icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
unsigned int hooknum)
icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conntrack_tuple innertuple, origtuple;
const struct nf_conntrack_l4proto *innerproto;
......@@ -137,13 +128,13 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
if (!nf_ct_get_tuplepr(skb,
skb_network_offset(skb) + ip_hdrlen(skb)
+ sizeof(struct icmphdr),
PF_INET, net, &origtuple)) {
PF_INET, state->net, &origtuple)) {
pr_debug("icmp_error_message: failed to get tuple\n");
return -NF_ACCEPT;
}
/* rcu_read_lock()ed by nf_hook_thresh */
innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
innerproto = __nf_ct_l4proto_find(origtuple.dst.protonum);
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
......@@ -154,7 +145,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
ctinfo = IP_CT_RELATED;
h = nf_conntrack_find_get(net, zone, &innertuple);
h = nf_conntrack_find_get(state->net, zone, &innertuple);
if (!h) {
pr_debug("icmp_error_message: no match\n");
return -NF_ACCEPT;
......@@ -168,17 +159,18 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
return NF_ACCEPT;
}
static void icmp_error_log(const struct sk_buff *skb, struct net *net,
u8 pf, const char *msg)
static void icmp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
nf_l4proto_log_invalid(skb, state->net, state->pf,
IPPROTO_ICMP, "%s", msg);
}
/* Small and modified version of icmp_rcv */
static int
icmp_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb, unsigned int dataoff,
u8 pf, unsigned int hooknum)
int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
struct sk_buff *skb, unsigned int dataoff,
const struct nf_hook_state *state)
{
const struct icmphdr *icmph;
struct icmphdr _ih;
......@@ -186,14 +178,15 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
/* Not enough header? */
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
icmp_error_log(skb, net, pf, "short packet");
icmp_error_log(skb, state, "short packet");
return -NF_ACCEPT;
}
/* See ip_conntrack_proto_tcp.c */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
icmp_error_log(skb, net, pf, "bad hw icmp checksum");
if (state->net->ct.sysctl_checksum &&
state->hook == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, state->hook, dataoff, 0)) {
icmp_error_log(skb, state, "bad hw icmp checksum");
return -NF_ACCEPT;
}
......@@ -204,7 +197,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
* discarded.
*/
if (icmph->type > NR_ICMP_TYPES) {
icmp_error_log(skb, net, pf, "invalid icmp type");
icmp_error_log(skb, state, "invalid icmp type");
return -NF_ACCEPT;
}
......@@ -216,7 +209,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
icmph->type != ICMP_REDIRECT)
return NF_ACCEPT;
return icmp_error_message(net, tmpl, skb, hooknum);
return icmp_error_message(tmpl, skb, state);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
......@@ -342,7 +335,7 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
static int icmp_init_net(struct net *net, u_int16_t proto)
static int icmp_init_net(struct net *net)
{
struct nf_icmp_net *in = icmp_pernet(net);
struct nf_proto_net *pn = &in->pn;
......@@ -359,13 +352,10 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_ICMP,
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
.packet = icmp_packet,
.new = icmp_new,
.error = icmp_error,
.destroy = NULL,
.me = NULL,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
......
......@@ -92,11 +92,31 @@ static unsigned int *icmpv6_get_timeouts(struct net *net)
/* Returns verdict for packet, or -1 for invalid. */
static int icmpv6_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo)
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
unsigned int *timeout = nf_ct_timeout_lookup(ct);
static const u8 valid_new[] = {
[ICMPV6_ECHO_REQUEST - 128] = 1,
[ICMPV6_NI_QUERY - 128] = 1
};
if (state->pf != NFPROTO_IPV6)
return -NF_ACCEPT;
if (!nf_ct_is_confirmed(ct)) {
int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
/* Can't create a new ICMPv6 `conn' with this. */
pr_debug("icmpv6: can't create new conn with type %u\n",
type + 128);
nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
return -NF_ACCEPT;
}
}
if (!timeout)
timeout = icmpv6_get_timeouts(nf_ct_net(ct));
......@@ -109,26 +129,6 @@ static int icmpv6_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff)
{
static const u_int8_t valid_new[] = {
[ICMPV6_ECHO_REQUEST - 128] = 1,
[ICMPV6_NI_QUERY - 128] = 1
};
int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
/* Can't create a new ICMPv6 `conn' with this. */
pr_debug("icmpv6: can't create new conn with type %u\n",
type + 128);
nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
return false;
}
return true;
}
static int
icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
......@@ -153,7 +153,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
}
/* rcu_read_lock()ed by nf_hook_thresh */
inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
inproto = __nf_ct_l4proto_find(origtuple.dst.protonum);
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
......@@ -179,16 +179,18 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
}
static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
u8 pf, const char *msg)
static void icmpv6_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
nf_l4proto_log_invalid(skb, state->net, state->pf,
IPPROTO_ICMPV6, "%s", msg);
}
static int
icmpv6_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb, unsigned int dataoff,
u8 pf, unsigned int hooknum)
int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
const struct nf_hook_state *state)
{
const struct icmp6hdr *icmp6h;
struct icmp6hdr _ih;
......@@ -196,13 +198,14 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
icmpv6_error_log(skb, net, pf, "short packet");
icmpv6_error_log(skb, state, "short packet");
return -NF_ACCEPT;
}
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
if (state->hook == NF_INET_PRE_ROUTING &&
state->net->ct.sysctl_checksum &&
nf_ip6_checksum(skb, state->hook, dataoff, IPPROTO_ICMPV6)) {
icmpv6_error_log(skb, state, "ICMPv6 checksum failed");
return -NF_ACCEPT;
}
......@@ -217,7 +220,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
if (icmp6h->icmp6_type >= 128)
return NF_ACCEPT;
return icmpv6_error_message(net, tmpl, skb, dataoff);
return icmpv6_error_message(state->net, tmpl, skb, dataoff);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
......@@ -343,7 +346,7 @@ static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
static int icmpv6_init_net(struct net *net, u_int16_t proto)
static int icmpv6_init_net(struct net *net)
{
struct nf_icmp_net *in = icmpv6_pernet(net);
struct nf_proto_net *pn = &in->pn;
......@@ -360,13 +363,10 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_ICMPV6,
.pkt_to_tuple = icmpv6_pkt_to_tuple,
.invert_tuple = icmpv6_invert_tuple,
.packet = icmpv6_packet,
.new = icmpv6_new,
.error = icmpv6_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_tuple_size = icmpv6_nlattr_tuple_size,
......
......@@ -273,11 +273,100 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
return sctp_conntracks[dir][i][cur_state];
}
/* Don't need lock here: this conntrack not in circulation yet */
static noinline bool
sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
const struct sctphdr *sh, unsigned int dataoff)
{
enum sctp_conntrack new_state;
const struct sctp_chunkhdr *sch;
struct sctp_chunkhdr _sch;
u32 offset, count;
memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
new_state = SCTP_CONNTRACK_MAX;
for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) {
new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
SCTP_CONNTRACK_NONE, sch->type);
/* Invalid: delete conntrack */
if (new_state == SCTP_CONNTRACK_NONE ||
new_state == SCTP_CONNTRACK_MAX) {
pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
return false;
}
/* Copy the vtag into the state info */
if (sch->type == SCTP_CID_INIT) {
struct sctp_inithdr _inithdr, *ih;
/* Sec 8.5.1 (A) */
if (sh->vtag)
return false;
ih = skb_header_pointer(skb, offset + sizeof(_sch),
sizeof(_inithdr), &_inithdr);
if (!ih)
return false;
pr_debug("Setting vtag %x for new conn\n",
ih->init_tag);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
} else {
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
pr_debug("Setting vtag %x for new conn OOTB\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
}
ct->proto.sctp.state = new_state;
}
return true;
}
static bool sctp_error(struct sk_buff *skb,
unsigned int dataoff,
const struct nf_hook_state *state)
{
const struct sctphdr *sh;
const char *logmsg;
if (skb->len < dataoff + sizeof(struct sctphdr)) {
logmsg = "nf_ct_sctp: short packet ";
goto out_invalid;
}
if (state->hook == NF_INET_PRE_ROUTING &&
state->net->ct.sysctl_checksum &&
skb->ip_summed == CHECKSUM_NONE) {
if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
logmsg = "nf_ct_sctp: failed to read header ";
goto out_invalid;
}
sh = (const struct sctphdr *)(skb->data + dataoff);
if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
logmsg = "nf_ct_sctp: bad CRC ";
goto out_invalid;
}
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return false;
out_invalid:
nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_SCTP, "%s", logmsg);
return true;
}
/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
static int sctp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo)
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
enum sctp_conntrack new_state, old_state;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
......@@ -289,6 +378,9 @@ static int sctp_packet(struct nf_conn *ct,
unsigned int *timeouts;
unsigned long map[256 / sizeof(unsigned long)] = { 0 };
if (sctp_error(skb, dataoff, state))
return -NF_ACCEPT;
sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
if (sh == NULL)
goto out;
......@@ -296,6 +388,17 @@ static int sctp_packet(struct nf_conn *ct,
if (do_basic_checks(ct, skb, dataoff, map) != 0)
goto out;
if (!nf_ct_is_confirmed(ct)) {
/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
if (test_bit(SCTP_CID_ABORT, map) ||
test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
test_bit(SCTP_CID_COOKIE_ACK, map))
return -NF_ACCEPT;
if (!sctp_new(ct, skb, sh, dataoff))
return -NF_ACCEPT;
}
/* Check the verification tag (Sec 8.5) */
if (!test_bit(SCTP_CID_INIT, map) &&
!test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
......@@ -397,110 +500,6 @@ static int sctp_packet(struct nf_conn *ct,
return -NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff)
{
enum sctp_conntrack new_state;
const struct sctphdr *sh;
struct sctphdr _sctph;
const struct sctp_chunkhdr *sch;
struct sctp_chunkhdr _sch;
u_int32_t offset, count;
unsigned long map[256 / sizeof(unsigned long)] = { 0 };
sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
if (sh == NULL)
return false;
if (do_basic_checks(ct, skb, dataoff, map) != 0)
return false;
/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
if (test_bit(SCTP_CID_ABORT, map) ||
test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
test_bit(SCTP_CID_COOKIE_ACK, map))
return false;
memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
new_state = SCTP_CONNTRACK_MAX;
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Don't need lock here: this conntrack not in circulation yet */
new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
SCTP_CONNTRACK_NONE, sch->type);
/* Invalid: delete conntrack */
if (new_state == SCTP_CONNTRACK_NONE ||
new_state == SCTP_CONNTRACK_MAX) {
pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
return false;
}
/* Copy the vtag into the state info */
if (sch->type == SCTP_CID_INIT) {
struct sctp_inithdr _inithdr, *ih;
/* Sec 8.5.1 (A) */
if (sh->vtag)
return false;
ih = skb_header_pointer(skb, offset + sizeof(_sch),
sizeof(_inithdr), &_inithdr);
if (!ih)
return false;
pr_debug("Setting vtag %x for new conn\n",
ih->init_tag);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
}
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
else {
pr_debug("Setting vtag %x for new conn OOTB\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
}
ct->proto.sctp.state = new_state;
}
return true;
}
static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
unsigned int dataoff,
u8 pf, unsigned int hooknum)
{
const struct sctphdr *sh;
const char *logmsg;
if (skb->len < dataoff + sizeof(struct sctphdr)) {
logmsg = "nf_ct_sctp: short packet ";
goto out_invalid;
}
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
skb->ip_summed == CHECKSUM_NONE) {
if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
logmsg = "nf_ct_sctp: failed to read header ";
goto out_invalid;
}
sh = (const struct sctphdr *)(skb->data + dataoff);
if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
logmsg = "nf_ct_sctp: bad CRC ";
goto out_invalid;
}
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return NF_ACCEPT;
out_invalid:
nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
return -NF_ACCEPT;
}
static bool sctp_can_early_drop(const struct nf_conn *ct)
{
switch (ct->proto.sctp.state) {
......@@ -735,7 +734,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
static int sctp_init_net(struct net *net, u_int16_t proto)
static int sctp_init_net(struct net *net)
{
struct nf_sctp_net *sn = sctp_pernet(net);
struct nf_proto_net *pn = &sn->pn;
......@@ -760,49 +759,12 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.sctp.pn;
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
#endif
.packet = sctp_packet,
.new = sctp_new,
.error = sctp_error,
.can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_size = SCTP_NLATTR_SIZE,
.to_nlattr = sctp_to_nlattr,
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
.ctnl_timeout = {
.nlattr_to_obj = sctp_timeout_nlattr_to_obj,
.obj_to_nlattr = sctp_timeout_obj_to_nlattr,
.nlattr_max = CTA_TIMEOUT_SCTP_MAX,
.obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX,
.nla_policy = sctp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
.init_net = sctp_init_net,
.get_net_proto = sctp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
.l3proto = PF_INET6,
const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
.l4proto = IPPROTO_SCTP,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
#endif
.packet = sctp_packet,
.new = sctp_new,
.error = sctp_error,
.can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
......@@ -826,4 +788,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
.init_net = sctp_init_net,
.get_net_proto = sctp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6);
This diff is collapsed.
......@@ -42,14 +42,65 @@ static unsigned int *udp_get_timeouts(struct net *net)
return udp_pernet(net)->timeouts;
}
static void udp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
nf_l4proto_log_invalid(skb, state->net, state->pf,
IPPROTO_UDP, "%s", msg);
}
static bool udp_error(struct sk_buff *skb,
unsigned int dataoff,
const struct nf_hook_state *state)
{
unsigned int udplen = skb->len - dataoff;
const struct udphdr *hdr;
struct udphdr _hdr;
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (!hdr) {
udp_error_log(skb, state, "short packet");
return true;
}
/* Truncated/malformed packets */
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
udp_error_log(skb, state, "truncated/malformed packet");
return true;
}
/* Packet with no checksum */
if (!hdr->check)
return false;
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
* because the checksum is assumed to be correct.
* FIXME: Source route IP option packets --RR */
if (state->hook == NF_INET_PRE_ROUTING &&
state->net->ct.sysctl_checksum &&
nf_checksum(skb, state->hook, dataoff, IPPROTO_UDP, state->pf)) {
udp_error_log(skb, state, "bad checksum");
return true;
}
return false;
}
/* Returns verdict for packet, and may modify conntracktype */
static int udp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo)
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
unsigned int *timeouts;
if (udp_error(skb, dataoff, state))
return -NF_ACCEPT;
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = udp_get_timeouts(nf_ct_net(ct));
......@@ -69,24 +120,18 @@ static int udp_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff)
{
return true;
}
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
static void udplite_error_log(const struct sk_buff *skb, struct net *net,
u8 pf, const char *msg)
static void udplite_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
nf_l4proto_log_invalid(skb, state->net, state->pf,
IPPROTO_UDPLITE, "%s", msg);
}
static int udplite_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u8 pf, unsigned int hooknum)
static bool udplite_error(struct sk_buff *skb,
unsigned int dataoff,
const struct nf_hook_state *state)
{
unsigned int udplen = skb->len - dataoff;
const struct udphdr *hdr;
......@@ -96,80 +141,67 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (!hdr) {
udplite_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
udplite_error_log(skb, state, "short packet");
return true;
}
cscov = ntohs(hdr->len);
if (cscov == 0) {
cscov = udplen;
} else if (cscov < sizeof(*hdr) || cscov > udplen) {
udplite_error_log(skb, net, pf, "invalid checksum coverage");
return -NF_ACCEPT;
udplite_error_log(skb, state, "invalid checksum coverage");
return true;
}
/* UDPLITE mandates checksums */
if (!hdr->check) {
udplite_error_log(skb, net, pf, "checksum missing");
return -NF_ACCEPT;
udplite_error_log(skb, state, "checksum missing");
return true;
}
/* Checksum invalid? Ignore. */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
pf)) {
udplite_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
if (state->hook == NF_INET_PRE_ROUTING &&
state->net->ct.sysctl_checksum &&
nf_checksum_partial(skb, state->hook, dataoff, cscov, IPPROTO_UDP,
state->pf)) {
udplite_error_log(skb, state, "bad checksum");
return true;
}
return NF_ACCEPT;
}
#endif
static void udp_error_log(const struct sk_buff *skb, struct net *net,
u8 pf, const char *msg)
{
nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
return false;
}
static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
unsigned int dataoff,
u_int8_t pf,
unsigned int hooknum)
/* Returns verdict for packet, and may modify conntracktype */
static int udplite_packet(struct nf_conn *ct,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
unsigned int udplen = skb->len - dataoff;
const struct udphdr *hdr;
struct udphdr _hdr;
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
udp_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
unsigned int *timeouts;
/* Truncated/malformed packets */
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
udp_error_log(skb, net, pf, "truncated/malformed packet");
if (udplite_error(skb, dataoff, state))
return -NF_ACCEPT;
}
/* Packet with no checksum */
if (!hdr->check)
return NF_ACCEPT;
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = udp_get_timeouts(nf_ct_net(ct));
/* Checksum invalid? Ignore.
* We skip checking packets on the outgoing path
* because the checksum is assumed to be correct.
* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
udp_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
nf_ct_refresh_acct(ct, ctinfo, skb,
timeouts[UDP_CT_REPLIED]);
/* Also, more likely to be important, and not a probe */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
nf_ct_refresh_acct(ct, ctinfo, skb,
timeouts[UDP_CT_UNREPLIED]);
}
return NF_ACCEPT;
}
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
......@@ -258,7 +290,7 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
static int udp_init_net(struct net *net, u_int16_t proto)
static int udp_init_net(struct net *net)
{
struct nf_udp_net *un = udp_pernet(net);
struct nf_proto_net *pn = &un->pn;
......@@ -278,72 +310,11 @@ static struct nf_proto_net *udp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.udp.pn;
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
.allow_clash = true,
.packet = udp_packet,
.new = udp_new,
.error = udp_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
.ctnl_timeout = {
.nlattr_to_obj = udp_timeout_nlattr_to_obj,
.obj_to_nlattr = udp_timeout_obj_to_nlattr,
.nlattr_max = CTA_TIMEOUT_UDP_MAX,
.obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
.nla_policy = udp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
.init_net = udp_init_net,
.get_net_proto = udp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
.allow_clash = true,
.packet = udp_packet,
.new = udp_new,
.error = udplite_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
.ctnl_timeout = {
.nlattr_to_obj = udp_timeout_nlattr_to_obj,
.obj_to_nlattr = udp_timeout_obj_to_nlattr,
.nlattr_max = CTA_TIMEOUT_UDP_MAX,
.obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
.nla_policy = udp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
.init_net = udp_init_net,
.get_net_proto = udp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4);
#endif
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
.allow_clash = true,
.packet = udp_packet,
.new = udp_new,
.error = udp_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
......@@ -362,17 +333,13 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
.init_net = udp_init_net,
.get_net_proto = udp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
.allow_clash = true,
.packet = udp_packet,
.new = udp_new,
.error = udplite_error,
.packet = udplite_packet,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
......@@ -391,5 +358,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
.init_net = udp_init_net,
.get_net_proto = udp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
#endif
......@@ -292,7 +292,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (!net_eq(nf_ct_net(ct), net))
goto release;
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
WARN_ON(!l4proto);
ret = -ENOSPC;
......@@ -720,10 +720,3 @@ static void __exit nf_conntrack_standalone_fini(void)
module_init(nf_conntrack_standalone_init);
module_exit(nf_conntrack_standalone_fini);
/* Some modules need us, but don't depend directly on any symbol.
They should call this. */
void need_conntrack(void)
{
}
EXPORT_SYMBOL_GPL(need_conntrack);
......@@ -120,7 +120,7 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
if (l4num == IPPROTO_TCP)
flow_offload_fixup_tcp(&ct->proto.tcp);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
l4proto = __nf_ct_l4proto_find(l4num);
if (!l4proto)
return;
......@@ -233,8 +233,8 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload *flow;
int dir;
tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
nf_flow_offload_rhash_params);
tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
nf_flow_offload_rhash_params);
if (!tuplehash)
return NULL;
......@@ -254,20 +254,17 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
struct flow_offload_tuple_rhash *tuplehash;
struct rhashtable_iter hti;
struct flow_offload *flow;
int err;
err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
if (err)
return err;
int err = 0;
rhashtable_walk_enter(&flow_table->rhashtable, &hti);
rhashtable_walk_start(&hti);
while ((tuplehash = rhashtable_walk_next(&hti))) {
if (IS_ERR(tuplehash)) {
err = PTR_ERR(tuplehash);
if (err != -EAGAIN)
goto out;
if (PTR_ERR(tuplehash) != -EAGAIN) {
err = PTR_ERR(tuplehash);
break;
}
continue;
}
if (tuplehash->tuple.dir)
......@@ -277,7 +274,6 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
iter(flow, data);
}
out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
......@@ -290,25 +286,19 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
return (__s32)(flow->timeout - (u32)jiffies) <= 0;
}
static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
{
struct flow_offload_tuple_rhash *tuplehash;
struct rhashtable_iter hti;
struct flow_offload *flow;
int err;
err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
if (err)
return 0;
rhashtable_walk_enter(&flow_table->rhashtable, &hti);
rhashtable_walk_start(&hti);
while ((tuplehash = rhashtable_walk_next(&hti))) {
if (IS_ERR(tuplehash)) {
err = PTR_ERR(tuplehash);
if (err != -EAGAIN)
goto out;
if (PTR_ERR(tuplehash) != -EAGAIN)
break;
continue;
}
if (tuplehash->tuple.dir)
......@@ -321,11 +311,8 @@ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
FLOW_OFFLOAD_TEARDOWN)))
flow_offload_del(flow_table, flow);
}
out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
return 1;
}
static void nf_flow_offload_work_gc(struct work_struct *work)
......@@ -514,7 +501,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
mutex_unlock(&flowtable_lock);
cancel_delayed_work_sync(&flow_table->gc_work);
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
WARN_ON(!nf_flow_offload_gc_step(flow_table));
nf_flow_offload_gc_step(flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
......
......@@ -254,8 +254,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
return NF_ACCEPT;
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
......@@ -471,8 +470,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (skb_try_make_writable(skb, sizeof(*ip6h)))
return NF_DROP;
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
nf_flow_nat_ipv6(flow, skb, dir) < 0)
if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
......
......@@ -37,7 +37,7 @@ static void mangle_contents(struct sk_buff *skb,
{
unsigned char *data;
BUG_ON(skb_is_nonlinear(skb));
SKB_LINEAR_ASSERT(skb);
data = skb_network_header(skb) + dataoff;
/* move post-replacement */
......@@ -110,8 +110,6 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
!enlarge_skb(skb, rep_len - match_len))
return false;
SKB_LINEAR_ASSERT(skb);
tcph = (void *)skb->data + protoff;
oldlen = skb->len - protoff;
......
......@@ -52,13 +52,11 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
newdst = 0;
rcu_read_lock();
indev = __in_dev_get_rcu(skb->dev);
if (indev && indev->ifa_list) {
ifa = indev->ifa_list;
newdst = ifa->ifa_local;
}
rcu_read_unlock();
if (!newdst)
return NF_DROP;
......@@ -97,7 +95,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
struct inet6_ifaddr *ifa;
bool addr = false;
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
if (idev != NULL) {
read_lock_bh(&idev->lock);
......@@ -108,7 +105,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
}
read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
if (!addr)
return NF_DROP;
......
......@@ -27,6 +27,8 @@
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
static u64 table_handle;
enum {
......@@ -64,6 +66,8 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state)
net->nft.validate_state = new_validate_state;
}
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
......@@ -207,6 +211,18 @@ static int nft_delchain(struct nft_ctx *ctx)
return err;
}
/* either expr ops provide both activate/deactivate, or neither */
static bool nft_expr_check_ops(const struct nft_expr_ops *ops)
{
if (!ops)
return true;
if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate)))
return false;
return true;
}
static void nft_rule_expr_activate(const struct nft_ctx *ctx,
struct nft_rule *rule)
{
......@@ -298,7 +314,7 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
return 0;
}
static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
struct nft_set *set)
{
struct nft_trans *trans;
......@@ -318,7 +334,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
return 0;
}
static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
......@@ -1005,7 +1021,8 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
BUG_ON(ctx->table->use > 0);
if (WARN_ON(ctx->table->use > 0))
return;
rhltable_destroy(&ctx->table->chains_ht);
kfree(ctx->table->name);
......@@ -1412,7 +1429,8 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
{
struct nft_chain *chain = ctx->chain;
BUG_ON(chain->use > 0);
if (WARN_ON(chain->use > 0))
return;
/* no concurrent access possible anymore */
nf_tables_chain_free_chain_rules(chain);
......@@ -1907,6 +1925,9 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
*/
int nft_register_expr(struct nft_expr_type *type)
{
if (!nft_expr_check_ops(type->ops))
return -EINVAL;
nfnl_lock(NFNL_SUBSYS_NFTABLES);
if (type->family == NFPROTO_UNSPEC)
list_add_tail_rcu(&type->list, &nf_tables_expressions);
......@@ -2054,6 +2075,10 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
err = PTR_ERR(ops);
goto err1;
}
if (!nft_expr_check_ops(ops)) {
err = -EINVAL;
goto err1;
}
} else
ops = type->ops;
......@@ -2434,7 +2459,6 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
{
struct nft_expr *expr;
lockdep_assert_held(&ctx->net->nft.commit_mutex);
/*
* Careful: some expressions might not be initialized in case this
* is called on error from nf_tables_newrule().
......@@ -3567,13 +3591,6 @@ static void nft_set_destroy(struct nft_set *set)
kvfree(set);
}
static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
list_del_rcu(&set->list);
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
nft_set_destroy(set);
}
static int nf_tables_delset(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
......@@ -3668,17 +3685,38 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
EXPORT_SYMBOL_GPL(nf_tables_bind_set);
void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding)
{
if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
nft_is_active(ctx->net, set))
list_add_tail_rcu(&set->list, &ctx->table->sets);
list_add_tail_rcu(&binding->list, &set->bindings);
}
EXPORT_SYMBOL_GPL(nf_tables_rebind_set);
void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding)
{
list_del_rcu(&binding->list);
if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
nft_is_active(ctx->net, set))
nf_tables_set_destroy(ctx, set);
list_del_rcu(&set->list);
}
EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
{
if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
nft_is_active(ctx->net, set)) {
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
nft_set_destroy(set);
}
}
EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
const struct nft_set_ext_type nft_set_ext_types[] = {
[NFT_SET_EXT_KEY] = {
.align = __alignof__(u32),
......@@ -6191,19 +6229,28 @@ static void nft_commit_release(struct nft_trans *trans)
nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
break;
}
if (trans->put_net)
put_net(trans->ctx.net);
kfree(trans);
}
static void nf_tables_commit_release(struct net *net)
static void nf_tables_trans_destroy_work(struct work_struct *w)
{
struct nft_trans *trans, *next;
LIST_HEAD(head);
if (list_empty(&net->nft.commit_list))
spin_lock(&nf_tables_destroy_list_lock);
list_splice_init(&nf_tables_destroy_list, &head);
spin_unlock(&nf_tables_destroy_list_lock);
if (list_empty(&head))
return;
synchronize_rcu();
list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
list_for_each_entry_safe(trans, next, &head, list) {
list_del(&trans->list);
nft_commit_release(trans);
}
......@@ -6334,6 +6381,37 @@ static void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
static void nf_tables_commit_release(struct net *net)
{
struct nft_trans *trans;
/* all side effects have to be made visible.
* For example, if a chain named 'foo' has been deleted, a
* new transaction must not find it anymore.
*
* Memory reclaim happens asynchronously from work queue
* to prevent expensive synchronize_rcu() in commit phase.
*/
if (list_empty(&net->nft.commit_list)) {
mutex_unlock(&net->nft.commit_mutex);
return;
}
trans = list_last_entry(&net->nft.commit_list,
struct nft_trans, list);
get_net(trans->ctx.net);
WARN_ON_ONCE(trans->put_net);
trans->put_net = true;
spin_lock(&nf_tables_destroy_list_lock);
list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list);
spin_unlock(&nf_tables_destroy_list_lock);
mutex_unlock(&net->nft.commit_mutex);
schedule_work(&trans_destroy_work);
}
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nft_trans *trans, *next;
......@@ -6495,9 +6573,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
}
nf_tables_commit_release(net);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
mutex_unlock(&net->nft.commit_mutex);
nf_tables_commit_release(net);
return 0;
}
......@@ -7168,7 +7245,8 @@ int __nft_release_basechain(struct nft_ctx *ctx)
{
struct nft_rule *rule, *nr;
BUG_ON(!nft_is_base_chain(ctx->chain));
if (WARN_ON(!nft_is_base_chain(ctx->chain)))
return 0;
nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
......@@ -7271,6 +7349,7 @@ static int __init nf_tables_module_init(void)
{
int err;
spin_lock_init(&nf_tables_destroy_list_lock);
err = register_pernet_subsys(&nf_tables_net_ops);
if (err < 0)
return err;
......@@ -7310,6 +7389,7 @@ static void __exit nf_tables_module_exit(void)
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
unregister_pernet_subsys(&nf_tables_net_ops);
cancel_work_sync(&trans_destroy_work);
rcu_barrier();
nf_tables_core_module_exit();
}
......
......@@ -249,12 +249,24 @@ static struct nft_expr_type *nft_basic_types[] = {
&nft_exthdr_type,
};
static struct nft_object_type *nft_basic_objects[] = {
#ifdef CONFIG_NETWORK_SECMARK
&nft_secmark_obj_type,
#endif
};
int __init nf_tables_core_module_init(void)
{
int err, i;
int err, i, j = 0;
for (i = 0; i < ARRAY_SIZE(nft_basic_objects); i++) {
err = nft_register_obj(nft_basic_objects[i]);
if (err)
goto err;
}
for (i = 0; i < ARRAY_SIZE(nft_basic_types); i++) {
err = nft_register_expr(nft_basic_types[i]);
for (j = 0; j < ARRAY_SIZE(nft_basic_types); j++) {
err = nft_register_expr(nft_basic_types[j]);
if (err)
goto err;
}
......@@ -262,8 +274,12 @@ int __init nf_tables_core_module_init(void)
return 0;
err:
while (j-- > 0)
nft_unregister_expr(nft_basic_types[j]);
while (i-- > 0)
nft_unregister_expr(nft_basic_types[i]);
nft_unregister_obj(nft_basic_objects[i]);
return err;
}
......@@ -274,4 +290,8 @@ void nf_tables_core_module_exit(void)
i = ARRAY_SIZE(nft_basic_types);
while (i-- > 0)
nft_unregister_expr(nft_basic_types[i]);
i = ARRAY_SIZE(nft_basic_objects);
while (i-- > 0)
nft_unregister_obj(nft_basic_objects[i]);
}
......@@ -53,9 +53,6 @@ ctnl_timeout_parse_policy(void *timeout,
struct nlattr **tb;
int ret = 0;
if (!l4proto->ctnl_timeout.nlattr_to_obj)
return 0;
tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
GFP_KERNEL);
......@@ -125,7 +122,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
return -EBUSY;
}
l4proto = nf_ct_l4proto_find_get(l3num, l4num);
l4proto = nf_ct_l4proto_find_get(l4num);
/* This protocol is not supportted, skip. */
if (l4proto->l4proto != l4num) {
......@@ -167,6 +164,8 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto;
struct nlattr *nest_parms;
int ret;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
......@@ -186,22 +185,15 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
htonl(refcount_read(&timeout->refcnt))))
goto nla_put_failure;
if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
struct nlattr *nest_parms;
int ret;
nest_parms = nla_nest_start(skb,
CTA_TIMEOUT_DATA | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
ret = l4proto->ctnl_timeout.obj_to_nlattr(skb,
&timeout->timeout.data);
if (ret < 0)
goto nla_put_failure;
ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->timeout.data);
if (ret < 0)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
}
nla_nest_end(skb, nest_parms);
nlmsg_end(skb, nlh);
return skb->len;
......@@ -369,7 +361,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
l4proto = nf_ct_l4proto_find_get(l3num, l4num);
l4proto = nf_ct_l4proto_find_get(l4num);
/* This protocol is not supported, skip. */
if (l4proto->l4proto != l4num) {
......@@ -391,12 +383,14 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
static int
cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u32 type, int event,
u32 seq, u32 type, int event, u16 l3num,
const struct nf_conntrack_l4proto *l4proto)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
struct nlattr *nest_parms;
int ret;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
......@@ -408,25 +402,19 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) ||
if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) ||
nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
goto nla_put_failure;
if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
struct nlattr *nest_parms;
int ret;
nest_parms = nla_nest_start(skb,
CTA_TIMEOUT_DATA | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
if (ret < 0)
goto nla_put_failure;
ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
if (ret < 0)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
}
nla_nest_end(skb, nest_parms);
nlmsg_end(skb, nlh);
return skb->len;
......@@ -454,7 +442,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
l4proto = nf_ct_l4proto_find_get(l3num, l4num);
l4proto = nf_ct_l4proto_find_get(l4num);
/* This protocol is not supported, skip. */
if (l4proto->l4proto != l4num) {
......@@ -472,6 +460,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type),
IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
l3num,
l4proto);
if (ret <= 0) {
kfree_skb(skb2);
......
......@@ -79,7 +79,8 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
tb[NFTA_CMP_DATA]);
BUG_ON(err < 0);
if (err < 0)
return err;
priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
err = nft_validate_register_load(priv->sreg, desc.len);
......@@ -129,7 +130,8 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
err = nft_data_init(NULL, &data, sizeof(data), &desc,
tb[NFTA_CMP_DATA]);
BUG_ON(err < 0);
if (err < 0)
return err;
priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
err = nft_validate_register_load(priv->sreg, desc.len);
......
......@@ -279,7 +279,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
{
const struct nft_ct *priv = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
#ifdef CONFIG_NF_CONNTRACK_MARK
#if defined(CONFIG_NF_CONNTRACK_MARK) || defined(CONFIG_NF_CONNTRACK_SECMARK)
u32 value = regs->data[priv->sreg];
#endif
enum ip_conntrack_info ctinfo;
......@@ -298,6 +298,14 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
}
break;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
if (ct->secmark != value) {
ct->secmark = value;
nf_conntrack_event_cache(IPCT_SECMARK, ct);
}
break;
#endif
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS:
nf_connlabels_replace(ct,
......@@ -564,6 +572,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
return -EINVAL;
len = sizeof(u32);
break;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
if (tb[NFTA_CT_DIRECTION])
return -EINVAL;
len = sizeof(u32);
break;
#endif
default:
return -EOPNOTSUPP;
......@@ -776,9 +791,6 @@ nft_ct_timeout_parse_policy(void *timeouts,
struct nlattr **tb;
int ret = 0;
if (!l4proto->ctnl_timeout.nlattr_to_obj)
return 0;
tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
GFP_KERNEL);
......@@ -858,7 +870,7 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
priv->l4proto = l4num;
l4proto = nf_ct_l4proto_find_get(l3num, l4num);
l4proto = nf_ct_l4proto_find_get(l4num);
if (l4proto->l4proto != l4num) {
ret = -EOPNOTSUPP;
......
......@@ -235,14 +235,31 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return err;
}
static void nft_dynset_activate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_dynset *priv = nft_expr_priv(expr);
nf_tables_rebind_set(ctx, priv->set, &priv->binding);
}
static void nft_dynset_deactivate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_dynset *priv = nft_expr_priv(expr);
nf_tables_unbind_set(ctx, priv->set, &priv->binding);
}
static void nft_dynset_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_dynset *priv = nft_expr_priv(expr);
nf_tables_unbind_set(ctx, priv->set, &priv->binding);
if (priv->expr != NULL)
nft_expr_destroy(ctx, priv->expr);
nf_tables_destroy_set(ctx, priv->set);
}
static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
......@@ -279,6 +296,8 @@ static const struct nft_expr_ops nft_dynset_ops = {
.eval = nft_dynset_eval,
.init = nft_dynset_init,
.destroy = nft_dynset_destroy,
.activate = nft_dynset_activate,
.deactivate = nft_dynset_deactivate,
.dump = nft_dynset_dump,
};
......
......@@ -121,12 +121,28 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return 0;
}
static void nft_lookup_activate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_lookup *priv = nft_expr_priv(expr);
nf_tables_rebind_set(ctx, priv->set, &priv->binding);
}
static void nft_lookup_deactivate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_lookup *priv = nft_expr_priv(expr);
nf_tables_unbind_set(ctx, priv->set, &priv->binding);
}
static void nft_lookup_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_lookup *priv = nft_expr_priv(expr);
nf_tables_unbind_set(ctx, priv->set, &priv->binding);
nf_tables_destroy_set(ctx, priv->set);
}
static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
......@@ -209,6 +225,8 @@ static const struct nft_expr_ops nft_lookup_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
.eval = nft_lookup_eval,
.init = nft_lookup_init,
.activate = nft_lookup_activate,
.deactivate = nft_lookup_deactivate,
.destroy = nft_lookup_destroy,
.dump = nft_lookup_dump,
.validate = nft_lookup_validate,
......
......@@ -284,6 +284,11 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
skb->nf_trace = !!value8;
break;
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
skb->secmark = value;
break;
#endif
default:
WARN_ON(1);
}
......@@ -436,6 +441,9 @@ static int nft_meta_set_init(const struct nft_ctx *ctx,
switch (priv->key) {
case NFT_META_MARK:
case NFT_META_PRIORITY:
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
#endif
len = sizeof(u32);
break;
case NFT_META_NFTRACE:
......@@ -543,3 +551,111 @@ struct nft_expr_type nft_meta_type __read_mostly = {
.maxattr = NFTA_META_MAX,
.owner = THIS_MODULE,
};
#ifdef CONFIG_NETWORK_SECMARK
struct nft_secmark {
u32 secid;
char *ctx;
};
static const struct nla_policy nft_secmark_policy[NFTA_SECMARK_MAX + 1] = {
[NFTA_SECMARK_CTX] = { .type = NLA_STRING, .len = NFT_SECMARK_CTX_MAXLEN },
};
static int nft_secmark_compute_secid(struct nft_secmark *priv)
{
u32 tmp_secid = 0;
int err;
err = security_secctx_to_secid(priv->ctx, strlen(priv->ctx), &tmp_secid);
if (err)
return err;
if (!tmp_secid)
return -ENOENT;
err = security_secmark_relabel_packet(tmp_secid);
if (err)
return err;
priv->secid = tmp_secid;
return 0;
}
static void nft_secmark_obj_eval(struct nft_object *obj, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_secmark *priv = nft_obj_data(obj);
struct sk_buff *skb = pkt->skb;
skb->secmark = priv->secid;
}
static int nft_secmark_obj_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_secmark *priv = nft_obj_data(obj);
int err;
if (tb[NFTA_SECMARK_CTX] == NULL)
return -EINVAL;
priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL);
if (!priv->ctx)
return -ENOMEM;
err = nft_secmark_compute_secid(priv);
if (err) {
kfree(priv->ctx);
return err;
}
security_secmark_refcount_inc();
return 0;
}
static int nft_secmark_obj_dump(struct sk_buff *skb, struct nft_object *obj,
bool reset)
{
struct nft_secmark *priv = nft_obj_data(obj);
int err;
if (nla_put_string(skb, NFTA_SECMARK_CTX, priv->ctx))
return -1;
if (reset) {
err = nft_secmark_compute_secid(priv);
if (err)
return err;
}
return 0;
}
static void nft_secmark_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
{
struct nft_secmark *priv = nft_obj_data(obj);
security_secmark_refcount_dec();
kfree(priv->ctx);
}
static const struct nft_object_ops nft_secmark_obj_ops = {
.type = &nft_secmark_obj_type,
.size = sizeof(struct nft_secmark),
.init = nft_secmark_obj_init,
.eval = nft_secmark_obj_eval,
.dump = nft_secmark_obj_dump,
.destroy = nft_secmark_obj_destroy,
};
struct nft_object_type nft_secmark_obj_type __read_mostly = {
.type = NFT_OBJECT_SECMARK,
.ops = &nft_secmark_obj_ops,
.maxattr = NFTA_SECMARK_MAX,
.policy = nft_secmark_policy,
.owner = THIS_MODULE,
};
#endif /* CONFIG_NETWORK_SECMARK */
......@@ -155,12 +155,28 @@ static int nft_objref_map_dump(struct sk_buff *skb, const struct nft_expr *expr)
return -1;
}
static void nft_objref_map_activate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_objref_map *priv = nft_expr_priv(expr);
nf_tables_rebind_set(ctx, priv->set, &priv->binding);
}
static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_objref_map *priv = nft_expr_priv(expr);
nf_tables_unbind_set(ctx, priv->set, &priv->binding);
}
static void nft_objref_map_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_objref_map *priv = nft_expr_priv(expr);
nf_tables_unbind_set(ctx, priv->set, &priv->binding);
nf_tables_destroy_set(ctx, priv->set);
}
static struct nft_expr_type nft_objref_type;
......@@ -169,6 +185,8 @@ static const struct nft_expr_ops nft_objref_map_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
.eval = nft_objref_map_eval,
.init = nft_objref_map_init,
.activate = nft_objref_map_activate,
.deactivate = nft_objref_map_deactivate,
.destroy = nft_objref_map_destroy,
.dump = nft_objref_map_dump,
};
......
......@@ -94,7 +94,8 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = {
int nft_reject_icmp_code(u8 code)
{
BUG_ON(code > NFT_REJECT_ICMPX_MAX);
if (WARN_ON_ONCE(code > NFT_REJECT_ICMPX_MAX))
return ICMP_NET_UNREACH;
return icmp_code_v4[code];
}
......@@ -111,7 +112,8 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = {
int nft_reject_icmpv6_code(u8 code)
{
BUG_ON(code > NFT_REJECT_ICMPX_MAX);
if (WARN_ON_ONCE(code > NFT_REJECT_ICMPX_MAX))
return ICMPV6_NOROUTE;
return icmp_code_v6[code];
}
......
......@@ -90,6 +90,11 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
case NFT_RT_TCPMSS:
nft_reg_store16(dest, get_tcpmss(pkt, dst));
break;
#ifdef CONFIG_XFRM
case NFT_RT_XFRM:
nft_reg_store8(dest, !!dst->xfrm);
break;
#endif
default:
WARN_ON(1);
goto err;
......@@ -130,6 +135,11 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
case NFT_RT_TCPMSS:
len = sizeof(u16);
break;
#ifdef CONFIG_XFRM
case NFT_RT_XFRM:
len = sizeof(u8);
break;
#endif
default:
return -EOPNOTSUPP;
}
......@@ -164,6 +174,7 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
case NFT_RT_NEXTHOP4:
case NFT_RT_NEXTHOP6:
case NFT_RT_CLASSID:
case NFT_RT_XFRM:
return 0;
case NFT_RT_TCPMSS:
hooks = (1 << NF_INET_FORWARD) |
......
......@@ -88,7 +88,7 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
.key = key,
};
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
*ext = &he->ext;
......@@ -106,7 +106,7 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
.key = elem->key.val.data,
};
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
return he;
......@@ -129,7 +129,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
.key = key,
};
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
goto out;
......@@ -217,7 +217,7 @@ static void *nft_rhash_deactivate(const struct net *net,
};
rcu_read_lock();
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL &&
!nft_rhash_flush(net, set, he))
he = NULL;
......@@ -244,21 +244,15 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_rhash_elem *he;
struct rhashtable_iter hti;
struct nft_set_elem elem;
int err;
err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
iter->err = err;
if (err)
return;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
err = PTR_ERR(he);
if (err != -EAGAIN) {
iter->err = err;
goto out;
if (PTR_ERR(he) != -EAGAIN) {
iter->err = PTR_ERR(he);
break;
}
continue;
......@@ -275,13 +269,11 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0)
goto out;
break;
cont:
iter->count++;
}
out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
}
......@@ -293,21 +285,17 @@ static void nft_rhash_gc(struct work_struct *work)
struct nft_rhash *priv;
struct nft_set_gc_batch *gcb = NULL;
struct rhashtable_iter hti;
int err;
priv = container_of(work, struct nft_rhash, gc_work.work);
set = nft_set_container_of(priv);
err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
if (err)
goto schedule;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
if (PTR_ERR(he) != -EAGAIN)
goto out;
break;
continue;
}
......@@ -326,17 +314,15 @@ static void nft_rhash_gc(struct work_struct *work)
gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
if (gcb == NULL)
goto out;
break;
rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
atomic_dec(&set->nelems);
nft_set_gc_batch_add(gcb, he);
}
out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
nft_set_gc_batch_complete(gcb);
schedule:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
......
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Generic part shared by ipv4 and ipv6 backends.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <linux/in.h>
#include <net/xfrm.h>
static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = {
[NFTA_XFRM_KEY] = { .type = NLA_U32 },
[NFTA_XFRM_DIR] = { .type = NLA_U8 },
[NFTA_XFRM_SPNUM] = { .type = NLA_U32 },
[NFTA_XFRM_DREG] = { .type = NLA_U32 },
};
struct nft_xfrm {
enum nft_xfrm_keys key:8;
enum nft_registers dreg:8;
u8 dir;
u8 spnum;
};
static int nft_xfrm_get_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_xfrm *priv = nft_expr_priv(expr);
unsigned int len = 0;
u32 spnum = 0;
u8 dir;
if (!tb[NFTA_XFRM_KEY] || !tb[NFTA_XFRM_DIR] || !tb[NFTA_XFRM_DREG])
return -EINVAL;
switch (ctx->family) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
case NFPROTO_INET:
break;
default:
return -EOPNOTSUPP;
}
priv->key = ntohl(nla_get_u32(tb[NFTA_XFRM_KEY]));
switch (priv->key) {
case NFT_XFRM_KEY_REQID:
case NFT_XFRM_KEY_SPI:
len = sizeof(u32);
break;
case NFT_XFRM_KEY_DADDR_IP4:
case NFT_XFRM_KEY_SADDR_IP4:
len = sizeof(struct in_addr);
break;
case NFT_XFRM_KEY_DADDR_IP6:
case NFT_XFRM_KEY_SADDR_IP6:
len = sizeof(struct in6_addr);
break;
default:
return -EINVAL;
}
dir = nla_get_u8(tb[NFTA_XFRM_DIR]);
switch (dir) {
case XFRM_POLICY_IN:
case XFRM_POLICY_OUT:
priv->dir = dir;
break;
default:
return -EINVAL;
}
if (tb[NFTA_XFRM_SPNUM])
spnum = ntohl(nla_get_be32(tb[NFTA_XFRM_SPNUM]));
if (spnum >= XFRM_MAX_DEPTH)
return -ERANGE;
priv->spnum = spnum;
priv->dreg = nft_parse_register(tb[NFTA_XFRM_DREG]);
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, len);
}
/* Return true if key asks for daddr/saddr and current
* state does have a valid address (BEET, TUNNEL).
*/
static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
{
switch (k) {
case NFT_XFRM_KEY_DADDR_IP4:
case NFT_XFRM_KEY_SADDR_IP4:
if (family == NFPROTO_IPV4)
break;
return false;
case NFT_XFRM_KEY_DADDR_IP6:
case NFT_XFRM_KEY_SADDR_IP6:
if (family == NFPROTO_IPV6)
break;
return false;
default:
return true;
}
return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL;
}
static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
struct nft_regs *regs,
const struct xfrm_state *state,
u8 family)
{
u32 *dest = &regs->data[priv->dreg];
if (!xfrm_state_addr_ok(priv->key, family, state->props.mode)) {
regs->verdict.code = NFT_BREAK;
return;
}
switch (priv->key) {
case NFT_XFRM_KEY_UNSPEC:
case __NFT_XFRM_KEY_MAX:
WARN_ON_ONCE(1);
break;
case NFT_XFRM_KEY_DADDR_IP4:
*dest = state->id.daddr.a4;
return;
case NFT_XFRM_KEY_DADDR_IP6:
memcpy(dest, &state->id.daddr.in6, sizeof(struct in6_addr));
return;
case NFT_XFRM_KEY_SADDR_IP4:
*dest = state->props.saddr.a4;
return;
case NFT_XFRM_KEY_SADDR_IP6:
memcpy(dest, &state->props.saddr.in6, sizeof(struct in6_addr));
return;
case NFT_XFRM_KEY_REQID:
*dest = state->props.reqid;
return;
case NFT_XFRM_KEY_SPI:
*dest = state->id.spi;
return;
}
regs->verdict.code = NFT_BREAK;
}
static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct sec_path *sp = pkt->skb->sp;
const struct xfrm_state *state;
if (sp == NULL || sp->len <= priv->spnum) {
regs->verdict.code = NFT_BREAK;
return;
}
state = sp->xvec[priv->spnum];
nft_xfrm_state_get_key(priv, regs, state, nft_pf(pkt));
}
static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct dst_entry *dst = skb_dst(pkt->skb);
int i;
for (i = 0; dst && dst->xfrm;
dst = ((const struct xfrm_dst *)dst)->child, i++) {
if (i < priv->spnum)
continue;
nft_xfrm_state_get_key(priv, regs, dst->xfrm, nft_pf(pkt));
return;
}
regs->verdict.code = NFT_BREAK;
}
static void nft_xfrm_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_xfrm *priv = nft_expr_priv(expr);
switch (priv->dir) {
case XFRM_POLICY_IN:
nft_xfrm_get_eval_in(priv, regs, pkt);
break;
case XFRM_POLICY_OUT:
nft_xfrm_get_eval_out(priv, regs, pkt);
break;
default:
WARN_ON_ONCE(1);
regs->verdict.code = NFT_BREAK;
break;
}
}
static int nft_xfrm_get_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
const struct nft_xfrm *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_XFRM_DREG, priv->dreg))
return -1;
if (nla_put_be32(skb, NFTA_XFRM_KEY, htonl(priv->key)))
return -1;
if (nla_put_u8(skb, NFTA_XFRM_DIR, priv->dir))
return -1;
if (nla_put_be32(skb, NFTA_XFRM_SPNUM, htonl(priv->spnum)))
return -1;
return 0;
}
static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nft_data **data)
{
const struct nft_xfrm *priv = nft_expr_priv(expr);
unsigned int hooks;
switch (priv->dir) {
case XFRM_POLICY_IN:
hooks = (1 << NF_INET_FORWARD) |
(1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_PRE_ROUTING);
break;
case XFRM_POLICY_OUT:
hooks = (1 << NF_INET_FORWARD) |
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING);
break;
default:
WARN_ON_ONCE(1);
return -EINVAL;
}
return nft_chain_validate_hooks(ctx->chain, hooks);
}
static struct nft_expr_type nft_xfrm_type;
static const struct nft_expr_ops nft_xfrm_get_ops = {
.type = &nft_xfrm_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_xfrm)),
.eval = nft_xfrm_get_eval,
.init = nft_xfrm_get_init,
.dump = nft_xfrm_get_dump,
.validate = nft_xfrm_validate,
};
static struct nft_expr_type nft_xfrm_type __read_mostly = {
.name = "xfrm",
.ops = &nft_xfrm_get_ops,
.policy = nft_xfrm_policy,
.maxattr = NFTA_XFRM_MAX,
.owner = THIS_MODULE,
};
static int __init nft_xfrm_module_init(void)
{
return nft_register_expr(&nft_xfrm_type);
}
static void __exit nft_xfrm_module_exit(void)
{
nft_unregister_expr(&nft_xfrm_type);
}
module_init(nft_xfrm_module_init);
module_exit(nft_xfrm_module_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("nf_tables: xfrm/IPSec matching");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
MODULE_AUTHOR("Máté Eckl <ecklm94@gmail.com>");
MODULE_ALIAS_NFT_EXPR("xfrm");
......@@ -159,7 +159,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
/* Make sure the timeout policy matches any existing protocol tracker,
* otherwise default to generic.
*/
l4proto = __nf_ct_l4proto_find(par->family, proto);
l4proto = __nf_ct_l4proto_find(proto);
if (timeout->l4proto->l4proto != l4proto->l4proto) {
ret = -EINVAL;
pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
......
......@@ -68,8 +68,6 @@ struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
{
struct idletimer_tg *entry;
BUG_ON(!label);
list_for_each_entry(entry, &idletimer_tg_list, entry) {
if (!strcmp(label, entry->attr.attr.name))
return entry;
......@@ -172,8 +170,6 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb,
pr_debug("resetting timer %s, timeout period %u\n",
info->label, info->timeout);
BUG_ON(!info->timer);
mod_timer(&info->timer->timer,
msecs_to_jiffies(info->timeout * 1000) + jiffies);
......
......@@ -35,8 +35,6 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
u32 secmark = 0;
const struct xt_secmark_target_info *info = par->targinfo;
BUG_ON(info->mode != mode);
switch (mode) {
case SECMARK_MODE_SEL:
secmark = info->secid;
......
......@@ -68,6 +68,38 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
return 0;
}
static int cgroup_mt_check_v2(const struct xt_mtchk_param *par)
{
struct xt_cgroup_info_v2 *info = par->matchinfo;
struct cgroup *cgrp;
if ((info->invert_path & ~1) || (info->invert_classid & ~1))
return -EINVAL;
if (!info->has_path && !info->has_classid) {
pr_info("xt_cgroup: no path or classid specified\n");
return -EINVAL;
}
if (info->has_path && info->has_classid) {
pr_info_ratelimited("path and classid specified\n");
return -EINVAL;
}
info->priv = NULL;
if (info->has_path) {
cgrp = cgroup_get_from_path(info->path);
if (IS_ERR(cgrp)) {
pr_info_ratelimited("invalid path, errno=%ld\n",
PTR_ERR(cgrp));
return -EINVAL;
}
info->priv = cgrp;
}
return 0;
}
static bool
cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
......@@ -99,6 +131,24 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
info->invert_classid;
}
static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_cgroup_info_v2 *info = par->matchinfo;
struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
struct cgroup *ancestor = info->priv;
struct sock *sk = skb->sk;
if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
return false;
if (ancestor)
return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
info->invert_path;
else
return (info->classid == sock_cgroup_classid(skcd)) ^
info->invert_classid;
}
static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
{
struct xt_cgroup_info_v1 *info = par->matchinfo;
......@@ -107,6 +157,14 @@ static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
cgroup_put(info->priv);
}
static void cgroup_mt_destroy_v2(const struct xt_mtdtor_param *par)
{
struct xt_cgroup_info_v2 *info = par->matchinfo;
if (info->priv)
cgroup_put(info->priv);
}
static struct xt_match cgroup_mt_reg[] __read_mostly = {
{
.name = "cgroup",
......@@ -134,6 +192,20 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = {
(1 << NF_INET_POST_ROUTING) |
(1 << NF_INET_LOCAL_IN),
},
{
.name = "cgroup",
.revision = 2,
.family = NFPROTO_UNSPEC,
.checkentry = cgroup_mt_check_v2,
.match = cgroup_mt_v2,
.matchsize = sizeof(struct xt_cgroup_info_v2),
.usersize = offsetof(struct xt_cgroup_info_v2, priv),
.destroy = cgroup_mt_destroy_v2,
.me = THIS_MODULE,
.hooks = (1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING) |
(1 << NF_INET_LOCAL_IN),
},
};
static int __init cgroup_mt_init(void)
......
......@@ -11,11 +11,6 @@
#include <linux/netfilter/xt_quota.h>
#include <linux/module.h>
struct xt_quota_priv {
spinlock_t lock;
uint64_t quota;
};
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
MODULE_DESCRIPTION("Xtables: countdown quota match");
......@@ -26,54 +21,48 @@ static bool
quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
struct xt_quota_info *q = (void *)par->matchinfo;
struct xt_quota_priv *priv = q->master;
u64 current_count = atomic64_read(&q->counter);
bool ret = q->flags & XT_QUOTA_INVERT;
spin_lock_bh(&priv->lock);
if (priv->quota >= skb->len) {
priv->quota -= skb->len;
ret = !ret;
} else {
/* we do not allow even small packets from now on */
priv->quota = 0;
}
spin_unlock_bh(&priv->lock);
return ret;
u64 old_count, new_count;
do {
if (current_count == 1)
return ret;
if (current_count <= skb->len) {
atomic64_set(&q->counter, 1);
return ret;
}
old_count = current_count;
new_count = current_count - skb->len;
current_count = atomic64_cmpxchg(&q->counter, old_count,
new_count);
} while (current_count != old_count);
return !ret;
}
static int quota_mt_check(const struct xt_mtchk_param *par)
{
struct xt_quota_info *q = par->matchinfo;
BUILD_BUG_ON(sizeof(atomic64_t) != sizeof(__u64));
if (q->flags & ~XT_QUOTA_MASK)
return -EINVAL;
if (atomic64_read(&q->counter) > q->quota + 1)
return -ERANGE;
q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
if (q->master == NULL)
return -ENOMEM;
spin_lock_init(&q->master->lock);
q->master->quota = q->quota;
if (atomic64_read(&q->counter) == 0)
atomic64_set(&q->counter, q->quota + 1);
return 0;
}
static void quota_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_quota_info *q = par->matchinfo;
kfree(q->master);
}
static struct xt_match quota_mt_reg __read_mostly = {
.name = "quota",
.revision = 0,
.family = NFPROTO_UNSPEC,
.match = quota_mt,
.checkentry = quota_mt_check,
.destroy = quota_mt_destroy,
.matchsize = sizeof(struct xt_quota_info),
.usersize = offsetof(struct xt_quota_info, master),
.me = THIS_MODULE,
};
......
......@@ -933,6 +933,11 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
struct nf_conn *ct;
if (!cached) {
struct nf_hook_state state = {
.hook = NF_INET_PRE_ROUTING,
.pf = info->family,
.net = net,
};
struct nf_conn *tmpl = info->ct;
int err;
......@@ -944,8 +949,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
nf_ct_set(skb, tmpl, IP_CT_NEW);
}
err = nf_conntrack_in(net, info->family,
NF_INET_PRE_ROUTING, skb);
err = nf_conntrack_in(skb, &state);
if (err != NF_ACCEPT)
return -ENOENT;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment