Commit 77bbcb60 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next. This
includes one patch to update ovs and act_ct to use nf_ct_put() instead
of nf_conntrack_put().

1) Add netns_tracker to nfnetlink_log and masquerade, from Eric Dumazet.

2) Remove redundant rcu read-size lock in nf_tables packet path.

3) Replace BUG() by WARN_ON_ONCE() in nft_payload.

4) Consolidate rule verdict tracing.

5) Replace WARN_ON() by WARN_ON_ONCE() in nf_tables core.

6) Make counter support built-in in nf_tables.

7) Add new field to conntrack object to identify locally generated
   traffic, from Florian Westphal.

8) Prevent NAT from shadowing well-known ports, from Florian Westphal.

9) Merge nf_flow_table_{ipv4,ipv6} into nf_flow_table_inet, also from
   Florian.

10) Remove redundant pointer in nft_pipapo AVX2 support, from Colin Ian King.

11) Replace opencoded max() in conntrack, from Jiapeng Chong.

12) Update conntrack to use refcount_t API, from Florian Westphal.

13) Move ip_ct_attach indirection into the nf_ct_hook structure.

14) Constify several pointer object in the netfilter codebase,
    from Florian Westphal.

15) Tree-wide replacement of nf_conntrack_put() by nf_ct_put(), also
    from Florian.

16) Fix egress splat due to incorrect rcu notation, from Florian.

17) Move stateful fields of connlimit, last, quota, numgen and limit
    out of the expression data area.

18) Build a blob to represent the ruleset in nf_tables, this is a
    requirement of the new register tracking infrastructure.

19) Add NFT_REG32_NUM to define the maximum number of 32-bit registers.

20) Add register tracking infrastructure to skip redundant
    store-to-register operations, this includes support for payload,
    meta and bitwise expresssions.

* git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next: (32 commits)
  netfilter: nft_meta: cancel register tracking after meta update
  netfilter: nft_payload: cancel register tracking after payload update
  netfilter: nft_bitwise: track register operations
  netfilter: nft_meta: track register operations
  netfilter: nft_payload: track register operations
  netfilter: nf_tables: add register tracking infrastructure
  netfilter: nf_tables: add NFT_REG32_NUM
  netfilter: nf_tables: add rule blob layout
  netfilter: nft_limit: move stateful fields out of expression data
  netfilter: nft_limit: rename stateful structure
  netfilter: nft_numgen: move stateful fields out of expression data
  netfilter: nft_quota: move stateful fields out of expression data
  netfilter: nft_last: move stateful fields out of expression data
  netfilter: nft_connlimit: move stateful fields out of expression data
  netfilter: egress: avoid a lockdep splat
  net: prefer nf_ct_put instead of nf_conntrack_put
  netfilter: conntrack: avoid useless indirection during conntrack destruction
  netfilter: make function op structures const
  netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook
  netfilter: conntrack: convert to refcount_t api
  ...
====================

Link: https://lore.kernel.org/r/20220109231640.104123-1-pablo@netfilter.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 9f3248c9 4a80e026
......@@ -381,13 +381,13 @@ struct nf_nat_hook {
enum ip_conntrack_dir dir);
};
extern struct nf_nat_hook __rcu *nf_nat_hook;
extern const struct nf_nat_hook __rcu *nf_nat_hook;
static inline void
nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
{
#if IS_ENABLED(CONFIG_NF_NAT)
struct nf_nat_hook *nat_hook;
const struct nf_nat_hook *nat_hook;
rcu_read_lock();
nat_hook = rcu_dereference(nf_nat_hook);
......@@ -440,7 +440,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <linux/netfilter/nf_conntrack_zones_common.h>
extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
struct nf_conntrack_tuple;
bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
......@@ -463,8 +462,9 @@ struct nf_ct_hook {
void (*destroy)(struct nf_conntrack *);
bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
const struct sk_buff *);
void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
};
extern struct nf_ct_hook __rcu *nf_ct_hook;
extern const struct nf_ct_hook __rcu *nf_ct_hook;
struct nlattr;
......@@ -479,7 +479,7 @@ struct nfnl_ct_hook {
void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, s32 off);
};
extern struct nfnl_ct_hook __rcu *nfnl_ct_hook;
extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook;
/**
* nf_skb_duplicated - TEE target has sent a packet
......
......@@ -2,7 +2,7 @@
#ifndef _NF_CONNTRACK_COMMON_H
#define _NF_CONNTRACK_COMMON_H
#include <linux/atomic.h>
#include <linux/refcount.h>
#include <uapi/linux/netfilter/nf_conntrack_common.h>
struct ip_conntrack_stat {
......@@ -25,19 +25,21 @@ struct ip_conntrack_stat {
#define NFCT_PTRMASK ~(NFCT_INFOMASK)
struct nf_conntrack {
atomic_t use;
refcount_t use;
};
void nf_conntrack_destroy(struct nf_conntrack *nfct);
/* like nf_ct_put, but without module dependency on nf_conntrack */
static inline void nf_conntrack_put(struct nf_conntrack *nfct)
{
if (nfct && atomic_dec_and_test(&nfct->use))
if (nfct && refcount_dec_and_test(&nfct->use))
nf_conntrack_destroy(nfct);
}
static inline void nf_conntrack_get(struct nf_conntrack *nfct)
{
if (nfct)
atomic_inc(&nfct->use);
refcount_inc(&nfct->use);
}
#endif /* _NF_CONNTRACK_COMMON_H */
......@@ -94,7 +94,7 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
return skb;
#endif
e = rcu_dereference(dev->nf_hooks_egress);
e = rcu_dereference_check(dev->nf_hooks_egress, rcu_read_lock_bh_held());
if (!e)
return skb;
......
......@@ -76,6 +76,8 @@ struct nf_conn {
* Hint, SKB address this struct and refcnt via skb->_nfct and
* helpers nf_conntrack_get() and nf_conntrack_put().
* Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
* except that the latter uses internal indirection and does not
* result in a conntrack module dependency.
* beware nf_ct_get() is different and don't inc refcnt.
*/
struct nf_conntrack ct_general;
......@@ -95,6 +97,7 @@ struct nf_conn {
unsigned long status;
u16 cpu;
u16 local_origin:1;
possible_net_t ct_net;
#if IS_ENABLED(CONFIG_NF_NAT)
......@@ -169,11 +172,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
return (struct nf_conn *)(nfct & NFCT_PTRMASK);
}
void nf_ct_destroy(struct nf_conntrack *nfct);
/* decrement reference count on a conntrack */
static inline void nf_ct_put(struct nf_conn *ct)
{
WARN_ON(!ct);
nf_conntrack_put(&ct->ct_general);
if (ct && refcount_dec_and_test(&ct->ct_general.use))
nf_ct_destroy(&ct->ct_general);
}
/* Protocol module loading */
......@@ -278,7 +283,7 @@ static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
{
s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
return timeout > 0 ? timeout : 0;
return max(timeout, 0);
}
static inline bool nf_ct_is_expired(const struct nf_conn *ct)
......
......@@ -105,6 +105,8 @@ struct nft_data {
};
} __attribute__((aligned(__alignof__(u64))));
#define NFT_REG32_NUM 20
/**
* struct nft_regs - nf_tables register set
*
......@@ -115,11 +117,21 @@ struct nft_data {
*/
struct nft_regs {
union {
u32 data[20];
u32 data[NFT_REG32_NUM];
struct nft_verdict verdict;
};
};
struct nft_regs_track {
struct {
const struct nft_expr *selector;
const struct nft_expr *bitwise;
} regs[NFT_REG32_NUM];
const struct nft_expr *cur;
const struct nft_expr *last;
};
/* Store/load an u8, u16 or u64 integer to/from the u32 data register.
*
* Note, when using concatenations, register allocation happens at 32-bit
......@@ -346,6 +358,8 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src);
void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
const struct nft_expr *expr);
bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
const struct nft_expr *expr);
struct nft_set_ext;
......@@ -884,6 +898,8 @@ struct nft_expr_ops {
int (*validate)(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data);
bool (*reduce)(struct nft_regs_track *track,
const struct nft_expr *expr);
bool (*gc)(struct net *net,
const struct nft_expr *expr);
int (*offload)(struct nft_offload_ctx *ctx,
......@@ -974,6 +990,20 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
#define NFT_CHAIN_POLICY_UNSET U8_MAX
struct nft_rule_dp {
u64 is_last:1,
dlen:12,
handle:42; /* for tracing */
unsigned char data[]
__attribute__((aligned(__alignof__(struct nft_expr))));
};
struct nft_rule_blob {
unsigned long size;
unsigned char data[]
__attribute__((aligned(__alignof__(struct nft_rule_dp))));
};
/**
* struct nft_chain - nf_tables chain
*
......@@ -987,8 +1017,8 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
* @name: name of the chain
*/
struct nft_chain {
struct nft_rule *__rcu *rules_gen_0;
struct nft_rule *__rcu *rules_gen_1;
struct nft_rule_blob __rcu *blob_gen_0;
struct nft_rule_blob __rcu *blob_gen_1;
struct list_head rules;
struct list_head list;
struct rhlist_head rhlhead;
......@@ -1003,7 +1033,7 @@ struct nft_chain {
u8 *udata;
/* Only used during control plane commit phase: */
struct nft_rule **rules_next;
struct nft_rule_blob *blob_next;
};
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
......@@ -1321,7 +1351,7 @@ struct nft_traceinfo {
const struct nft_pktinfo *pkt;
const struct nft_base_chain *basechain;
const struct nft_chain *chain;
const struct nft_rule *rule;
const struct nft_rule_dp *rule;
const struct nft_verdict *verdict;
enum nft_trace_types type;
bool packet_dumped;
......
......@@ -7,6 +7,7 @@
extern struct nft_expr_type nft_imm_type;
extern struct nft_expr_type nft_cmp_type;
extern struct nft_expr_type nft_counter_type;
extern struct nft_expr_type nft_lookup_type;
extern struct nft_expr_type nft_bitwise_type;
extern struct nft_expr_type nft_byteorder_type;
......@@ -21,6 +22,7 @@ extern struct nft_expr_type nft_last_type;
#ifdef CONFIG_NETWORK_SECMARK
extern struct nft_object_type nft_secmark_obj_type;
#endif
extern struct nft_object_type nft_counter_obj_type;
int nf_tables_core_module_init(void);
void nf_tables_core_module_exit(void);
......@@ -120,6 +122,8 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
void nft_counter_init_seqcount(void);
struct nft_expr;
struct nft_regs;
struct nft_pktinfo;
......@@ -143,4 +147,6 @@ void nft_dynset_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt);
void nft_rt_get_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt);
void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt);
#endif /* _NET_NF_TABLES_CORE_H */
......@@ -100,6 +100,25 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = {
.dump = nft_meta_get_dump,
};
static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
int i;
for (i = 0; i < NFT_REG32_NUM; i++) {
if (!track->regs[i].selector)
continue;
if (track->regs[i].selector->ops != &nft_meta_bridge_get_ops)
continue;
track->regs[i].selector = NULL;
track->regs[i].bitwise = NULL;
}
return false;
}
static const struct nft_expr_ops nft_meta_bridge_set_ops = {
.type = &nft_meta_bridge_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
......@@ -107,6 +126,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = {
.init = nft_meta_set_init,
.destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
.reduce = nft_meta_bridge_set_reduce,
.validate = nft_meta_set_validate,
};
......
......@@ -59,12 +59,8 @@ config NF_TABLES_ARP
endif # NF_TABLES
config NF_FLOW_TABLE_IPV4
tristate "Netfilter flow table IPv4 module"
depends on NF_FLOW_TABLE
help
This option adds the flow table IPv4 support.
To compile it as a module, choose M here.
tristate
select NF_FLOW_TABLE_INET
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
......
......@@ -24,9 +24,6 @@ obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
# flow table support
obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
.init = nf_flow_table_init,
.setup = nf_flow_table_offload_setup,
.action = nf_flow_rule_route_ipv4,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ip_hook,
.owner = THIS_MODULE,
};
static int __init nf_flow_ipv4_module_init(void)
{
nft_register_flowtable_type(&flowtable_ipv4);
return 0;
}
static void __exit nf_flow_ipv4_module_exit(void)
{
nft_unregister_flowtable_type(&flowtable_ipv4);
}
module_init(nf_flow_ipv4_module_init);
module_exit(nf_flow_ipv4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
MODULE_DESCRIPTION("Netfilter flow table support");
......@@ -48,12 +48,8 @@ endif # NF_TABLES_IPV6
endif # NF_TABLES
config NF_FLOW_TABLE_IPV6
tristate "Netfilter flow table IPv6 module"
depends on NF_FLOW_TABLE
help
This option adds the flow table IPv6 support.
To compile it as a module, choose M here.
tristate
select NF_FLOW_TABLE_INET
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
.init = nf_flow_table_init,
.setup = nf_flow_table_offload_setup,
.action = nf_flow_rule_route_ipv6,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,
};
static int __init nf_flow_ipv6_module_init(void)
{
nft_register_flowtable_type(&flowtable_ipv6);
return 0;
}
static void __exit nf_flow_ipv6_module_exit(void)
{
nft_unregister_flowtable_type(&flowtable_ipv6);
}
module_init(nf_flow_ipv6_module_init);
module_exit(nf_flow_ipv6_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
MODULE_DESCRIPTION("Netfilter flow table IPv6 module");
......@@ -515,12 +515,6 @@ config NFT_FLOW_OFFLOAD
This option adds the "flow_offload" expression that you can use to
choose what flows are placed into the hardware.
config NFT_COUNTER
tristate "Netfilter nf_tables counter module"
help
This option adds the "counter" expression that you can use to
include packet and byte counters in a rule.
config NFT_CONNLIMIT
tristate "Netfilter nf_tables connlimit module"
depends on NF_CONNTRACK
......
......@@ -75,7 +75,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \
nft_chain_route.o nf_tables_offload.o \
nft_counter.o nft_chain_route.o nf_tables_offload.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
nft_set_pipapo.o
......@@ -100,7 +100,6 @@ obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
obj-$(CONFIG_NFT_REJECT_NETDEV) += nft_reject_netdev.o
obj-$(CONFIG_NFT_TUNNEL) += nft_tunnel.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
obj-$(CONFIG_NFT_REDIR) += nft_redir.o
......
......@@ -666,32 +666,29 @@ EXPORT_SYMBOL(nf_hook_slow_list);
/* This needs to be compiled in any case to avoid dependencies between the
* nfnetlink_queue code and nf_conntrack.
*/
struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
const struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nfnl_ct_hook);
struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_hook);
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
__rcu __read_mostly;
EXPORT_SYMBOL(ip_ct_attach);
struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
const struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_hook);
/* This does not belong here, but locally generated errors need it if connection
* tracking in use: without this, connection may not be in hash table, and hence
* manufactured ICMP or RST packets will not be associated with it.
*/
void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
{
void (*attach)(struct sk_buff *, const struct sk_buff *);
const struct nf_ct_hook *ct_hook;
if (skb->_nfct) {
rcu_read_lock();
attach = rcu_dereference(ip_ct_attach);
if (attach)
attach(new, skb);
ct_hook = rcu_dereference(nf_ct_hook);
if (ct_hook)
ct_hook->attach(new, skb);
rcu_read_unlock();
}
}
......@@ -699,7 +696,7 @@ EXPORT_SYMBOL(nf_ct_attach);
void nf_conntrack_destroy(struct nf_conntrack *nfct)
{
struct nf_ct_hook *ct_hook;
const struct nf_ct_hook *ct_hook;
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
......@@ -712,7 +709,7 @@ EXPORT_SYMBOL(nf_conntrack_destroy);
bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
const struct sk_buff *skb)
{
struct nf_ct_hook *ct_hook;
const struct nf_ct_hook *ct_hook;
bool ret = false;
rcu_read_lock();
......
......@@ -559,7 +559,7 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
#define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
/* Released via destroy_conntrack() */
/* Released via nf_ct_destroy() */
struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
gfp_t flags)
......@@ -586,7 +586,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
tmpl->status = IPS_TEMPLATE;
write_pnet(&tmpl->ct_net, net);
nf_ct_zone_add(tmpl, zone);
atomic_set(&tmpl->ct_general.use, 0);
refcount_set(&tmpl->ct_general.use, 1);
return tmpl;
}
......@@ -613,13 +613,12 @@ static void destroy_gre_conntrack(struct nf_conn *ct)
#endif
}
static void
destroy_conntrack(struct nf_conntrack *nfct)
void nf_ct_destroy(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
pr_debug("destroy_conntrack(%p)\n", ct);
WARN_ON(atomic_read(&nfct->use) != 0);
pr_debug("%s(%p)\n", __func__, ct);
WARN_ON(refcount_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
......@@ -644,9 +643,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
if (ct->master)
nf_ct_put(ct->master);
pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
pr_debug("%s: returning ct=%p to slab\n", __func__, ct);
nf_conntrack_free(ct);
}
EXPORT_SYMBOL(nf_ct_destroy);
static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
......@@ -743,7 +743,7 @@ nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
/* caller must hold rcu readlock and none of the nf_conntrack_locks */
static void nf_ct_gc_expired(struct nf_conn *ct)
{
if (!atomic_inc_not_zero(&ct->ct_general.use))
if (!refcount_inc_not_zero(&ct->ct_general.use))
return;
if (nf_ct_should_gc(ct))
......@@ -811,7 +811,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
* in, try to obtain a reference and re-check tuple
*/
ct = nf_ct_tuplehash_to_ctrack(h);
if (likely(atomic_inc_not_zero(&ct->ct_general.use))) {
if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
if (likely(nf_ct_key_equal(h, tuple, zone, net)))
goto found;
......@@ -908,7 +908,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
smp_wmb();
/* The caller holds a reference to this object */
atomic_set(&ct->ct_general.use, 2);
refcount_set(&ct->ct_general.use, 2);
__nf_conntrack_hash_insert(ct, hash, reply_hash);
nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
......@@ -959,7 +959,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
{
struct nf_conn_tstamp *tstamp;
atomic_inc(&ct->ct_general.use);
refcount_inc(&ct->ct_general.use);
ct->status |= IPS_CONFIRMED;
/* set conntrack timestamp, if enabled. */
......@@ -990,7 +990,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
nf_ct_acct_merge(ct, ctinfo, loser_ct);
nf_ct_add_to_dying_list(loser_ct);
nf_conntrack_put(&loser_ct->ct_general);
nf_ct_put(loser_ct);
nf_ct_set(skb, ct, ctinfo);
NF_CT_STAT_INC(net, clash_resolve);
......@@ -1352,7 +1352,7 @@ static unsigned int early_drop_list(struct net *net,
nf_ct_is_dying(tmp))
continue;
if (!atomic_inc_not_zero(&tmp->ct_general.use))
if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
/* kill only if still in same netns -- might have moved due to
......@@ -1470,7 +1470,7 @@ static void gc_worker(struct work_struct *work)
continue;
/* need to take reference to avoid possible races */
if (!atomic_inc_not_zero(&tmp->ct_general.use))
if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
if (gc_worker_skip_ct(tmp)) {
......@@ -1570,7 +1570,7 @@ __nf_conntrack_alloc(struct net *net,
/* Because we use RCU lookups, we set ct_general.use to zero before
* this is inserted in any list.
*/
atomic_set(&ct->ct_general.use, 0);
refcount_set(&ct->ct_general.use, 0);
return ct;
out:
atomic_dec(&cnet->count);
......@@ -1595,7 +1595,7 @@ void nf_conntrack_free(struct nf_conn *ct)
/* A freed object has refcnt == 0, that's
* the golden rule for SLAB_TYPESAFE_BY_RCU
*/
WARN_ON(atomic_read(&ct->ct_general.use) != 0);
WARN_ON(refcount_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
kmem_cache_free(nf_conntrack_cachep, ct);
......@@ -1687,8 +1687,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
if (!exp)
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
/* Now it is inserted into the unconfirmed list, bump refcount */
nf_conntrack_get(&ct->ct_general);
/* Now it is inserted into the unconfirmed list, set refcount to 1. */
refcount_set(&ct->ct_general.use, 1);
nf_ct_add_to_unconfirmed_list(ct);
local_bh_enable();
......@@ -1748,6 +1748,9 @@ resolve_normal_ct(struct nf_conn *tmpl,
return 0;
if (IS_ERR(h))
return PTR_ERR(h);
ct = nf_ct_tuplehash_to_ctrack(h);
ct->local_origin = state->hook == NF_INET_LOCAL_OUT;
}
ct = nf_ct_tuplehash_to_ctrack(h);
......@@ -1919,7 +1922,7 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_conntrack_put(&ct->ct_general);
nf_ct_put(ct);
skb->_nfct = 0;
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
if (ret == -NF_DROP)
......@@ -2083,9 +2086,9 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
const struct nf_nat_hook *nat_hook;
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_nat_hook *nat_hook;
unsigned int status;
int dataoff;
u16 l3num;
......@@ -2298,7 +2301,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
return NULL;
found:
atomic_inc(&ct->ct_general.use);
refcount_inc(&ct->ct_general.use);
spin_unlock(lockp);
local_bh_enable();
return ct;
......@@ -2453,7 +2456,6 @@ static int kill_all(struct nf_conn *i, void *data)
void nf_conntrack_cleanup_start(void)
{
conntrack_gc_work.exiting = true;
RCU_INIT_POINTER(ip_ct_attach, NULL);
}
void nf_conntrack_cleanup_end(void)
......@@ -2771,16 +2773,15 @@ int nf_conntrack_init_start(void)
return ret;
}
static struct nf_ct_hook nf_conntrack_hook = {
static const struct nf_ct_hook nf_conntrack_hook = {
.update = nf_conntrack_update,
.destroy = destroy_conntrack,
.destroy = nf_ct_destroy,
.get_tuple_skb = nf_conntrack_get_tuple_skb,
.attach = nf_conntrack_attach,
};
void nf_conntrack_init_end(void)
{
/* For use by REJECT target */
RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook);
}
......
......@@ -203,12 +203,12 @@ nf_ct_find_expectation(struct net *net,
* about to invoke ->destroy(), or nf_ct_delete() via timeout
* or early_drop().
*
* The atomic_inc_not_zero() check tells: If that fails, we
* The refcount_inc_not_zero() check tells: If that fails, we
* know that the ct is being destroyed. If it succeeds, we
* can be sure the ct cannot disappear underneath.
*/
if (unlikely(nf_ct_is_dying(exp->master) ||
!atomic_inc_not_zero(&exp->master->ct_general.use)))
!refcount_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
......
......@@ -508,7 +508,7 @@ static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
{
if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use))))
if (nla_put_be32(skb, CTA_USE, htonl(refcount_read(&ct->ct_general.use))))
goto nla_put_failure;
return 0;
......@@ -1198,7 +1198,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
ct = nf_ct_tuplehash_to_ctrack(h);
if (nf_ct_is_expired(ct)) {
if (i < ARRAY_SIZE(nf_ct_evict) &&
atomic_inc_not_zero(&ct->ct_general.use))
refcount_inc_not_zero(&ct->ct_general.use))
nf_ct_evict[i++] = ct;
continue;
}
......@@ -1749,7 +1749,7 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
ct, dying, 0);
if (res < 0) {
if (!atomic_inc_not_zero(&ct->ct_general.use))
if (!refcount_inc_not_zero(&ct->ct_general.use))
continue;
cb->args[0] = cpu;
cb->args[1] = (unsigned long)ct;
......@@ -1820,7 +1820,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
const struct nlattr *attr)
__must_hold(RCU)
{
struct nf_nat_hook *nat_hook;
const struct nf_nat_hook *nat_hook;
int err;
nat_hook = rcu_dereference(nf_nat_hook);
......@@ -2922,7 +2922,7 @@ static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff);
}
static struct nfnl_ct_hook ctnetlink_glue_hook = {
static const struct nfnl_ct_hook ctnetlink_glue_hook = {
.build_size = ctnetlink_glue_build_size,
.build = ctnetlink_glue_build,
.parse = ctnetlink_glue_parse,
......
......@@ -303,7 +303,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
int ret = 0;
WARN_ON(!ct);
if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use)))
return 0;
if (nf_ct_should_gc(ct)) {
......@@ -370,7 +370,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
ct_show_delta_time(s, ct);
seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
seq_printf(s, "use=%u\n", refcount_read(&ct->ct_general.use));
if (seq_has_overflowed(s))
goto release;
......
......@@ -48,7 +48,7 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
struct flow_offload *flow;
if (unlikely(nf_ct_is_dying(ct) ||
!atomic_inc_not_zero(&ct->ct_general.use)))
!refcount_inc_not_zero(&ct->ct_general.use)))
return NULL;
flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
......
......@@ -54,8 +54,30 @@ static struct nf_flowtable_type flowtable_inet = {
.owner = THIS_MODULE,
};
static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
.init = nf_flow_table_init,
.setup = nf_flow_table_offload_setup,
.action = nf_flow_rule_route_ipv4,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ip_hook,
.owner = THIS_MODULE,
};
static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
.init = nf_flow_table_init,
.setup = nf_flow_table_offload_setup,
.action = nf_flow_rule_route_ipv6,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,
};
static int __init nf_flow_inet_module_init(void)
{
nft_register_flowtable_type(&flowtable_ipv4);
nft_register_flowtable_type(&flowtable_ipv6);
nft_register_flowtable_type(&flowtable_inet);
return 0;
......@@ -64,6 +86,8 @@ static int __init nf_flow_inet_module_init(void)
static void __exit nf_flow_inet_module_exit(void)
{
nft_unregister_flowtable_type(&flowtable_inet);
nft_unregister_flowtable_type(&flowtable_ipv6);
nft_unregister_flowtable_type(&flowtable_ipv4);
}
module_init(nf_flow_inet_module_init);
......@@ -71,5 +95,7 @@ module_exit(nf_flow_inet_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */
MODULE_DESCRIPTION("Netfilter flow table mixed IPv4/IPv6 module");
......@@ -494,6 +494,38 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
goto another_round;
}
static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple)
{
u16 sp, dp;
switch (tuple->dst.protonum) {
case IPPROTO_TCP:
sp = ntohs(tuple->src.u.tcp.port);
dp = ntohs(tuple->dst.u.tcp.port);
break;
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
sp = ntohs(tuple->src.u.udp.port);
dp = ntohs(tuple->dst.u.udp.port);
break;
default:
return false;
}
/* IANA: System port range: 1-1023,
* user port range: 1024-49151,
* private port range: 49152-65535.
*
* Linux default ephemeral port range is 32768-60999.
*
* Enforce port remapping if sport is significantly lower
* than dport to prevent NAT port shadowing, i.e.
* accidental match of 'new' inbound connection vs.
* existing outbound one.
*/
return sp < 16384 && dp >= 32768;
}
/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
* we change the source to map into the range. For NF_INET_PRE_ROUTING
* and NF_INET_LOCAL_OUT, we change the destination to map into the
......@@ -507,11 +539,17 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL;
const struct nf_conntrack_zone *zone;
struct net *net = nf_ct_net(ct);
zone = nf_ct_zone(ct);
if (maniptype == NF_NAT_MANIP_SRC &&
!random_port &&
!ct->local_origin)
random_port = tuple_force_port_remap(orig_tuple);
/* 1) If this srcip/proto/src-proto-part is currently mapped,
* and that same mapping gives a unique tuple within the given
* range, use that.
......@@ -520,8 +558,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
* So far, we don't do local source mappings, so multiple
* manips not an issue.
*/
if (maniptype == NF_NAT_MANIP_SRC &&
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
if (maniptype == NF_NAT_MANIP_SRC && !random_port) {
/* try the original tuple first */
if (in_range(orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
......@@ -545,7 +582,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
*/
/* Only bother mapping if it's not already in range and unique */
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
if (!random_port) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
l4proto_in_range(tuple, maniptype,
......@@ -1130,7 +1167,7 @@ static struct pernet_operations nat_net_ops = {
.size = sizeof(struct nat_net),
};
static struct nf_nat_hook nat_hook = {
static const struct nf_nat_hook nat_hook = {
.parse_nat_setup = nfnetlink_parse_nat_setup,
#ifdef CONFIG_XFRM
.decode_session = __nf_nat_decode_session,
......
......@@ -12,6 +12,7 @@
struct masq_dev_work {
struct work_struct work;
struct net *net;
netns_tracker ns_tracker;
union nf_inet_addr addr;
int ifindex;
int (*iter)(struct nf_conn *i, void *data);
......@@ -82,7 +83,7 @@ static void iterate_cleanup_work(struct work_struct *work)
nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
put_net(w->net);
put_net_track(w->net, &w->ns_tracker);
kfree(w);
atomic_dec(&masq_worker_count);
module_put(THIS_MODULE);
......@@ -119,6 +120,7 @@ static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
INIT_WORK(&w->work, iterate_cleanup_work);
w->ifindex = ifindex;
w->net = net;
netns_tracker_alloc(net, &w->ns_tracker, gfp_flags);
w->iter = iter;
if (addr)
w->addr = *addr;
......
......@@ -349,7 +349,6 @@ static int __net_init synproxy_net_init(struct net *net)
goto err2;
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
nf_conntrack_get(&ct->ct_general);
snet->tmpl = ct;
snet->stats = alloc_percpu(struct synproxy_stats);
......
......@@ -1747,16 +1747,16 @@ static void nft_chain_stats_replace(struct nft_trans *trans)
static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
{
struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0);
struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1);
struct nft_rule_blob *g0 = rcu_dereference_raw(chain->blob_gen_0);
struct nft_rule_blob *g1 = rcu_dereference_raw(chain->blob_gen_1);
if (g0 != g1)
kvfree(g1);
kvfree(g0);
/* should be NULL either via abort or via successful commit */
WARN_ON_ONCE(chain->rules_next);
kvfree(chain->rules_next);
WARN_ON_ONCE(chain->blob_next);
kvfree(chain->blob_next);
}
void nf_tables_chain_destroy(struct nft_ctx *ctx)
......@@ -2002,23 +2002,39 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
struct nft_rules_old {
struct rcu_head h;
struct nft_rule **start;
struct nft_rule_blob *blob;
};
static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain,
unsigned int alloc)
static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr)
{
if (alloc > INT_MAX)
struct nft_rule_dp *prule;
prule = (struct nft_rule_dp *)ptr;
prule->is_last = 1;
ptr += offsetof(struct nft_rule_dp, data);
/* blob size does not include the trailer rule */
}
static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
{
struct nft_rule_blob *blob;
/* size must include room for the last rule */
if (size < offsetof(struct nft_rule_dp, data))
return NULL;
size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rules_old);
if (size > INT_MAX)
return NULL;
alloc += 1; /* NULL, ends rules */
if (sizeof(struct nft_rule *) > INT_MAX / alloc)
blob = kvmalloc(size, GFP_KERNEL);
if (!blob)
return NULL;
alloc *= sizeof(struct nft_rule *);
alloc += sizeof(struct nft_rules_old);
blob->size = 0;
nft_last_rule(blob, blob->data);
return kvmalloc(alloc, GFP_KERNEL);
return blob;
}
static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
......@@ -2091,9 +2107,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
char name[NFT_NAME_MAXLEN];
struct nft_rule_blob *blob;
struct nft_trans *trans;
struct nft_chain *chain;
struct nft_rule **rules;
unsigned int data_size;
int err;
if (table->use == UINT_MAX)
......@@ -2178,15 +2195,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]);
}
rules = nf_tables_chain_alloc_rules(chain, 0);
if (!rules) {
data_size = offsetof(struct nft_rule_dp, data); /* last rule */
blob = nf_tables_chain_alloc_rules(data_size);
if (!blob) {
err = -ENOMEM;
goto err_destroy_chain;
}
*rules = NULL;
rcu_assign_pointer(chain->rules_gen_0, rules);
rcu_assign_pointer(chain->rules_gen_1, rules);
RCU_INIT_POINTER(chain->blob_gen_0, blob);
RCU_INIT_POINTER(chain->blob_gen_1, blob);
err = nf_tables_register_hook(net, table, chain);
if (err < 0)
......@@ -8241,32 +8258,83 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
{
const struct nft_expr *expr, *last;
struct nft_regs_track track = {};
unsigned int size, data_size;
void *data, *data_boundary;
struct nft_rule_dp *prule;
struct nft_rule *rule;
unsigned int alloc = 0;
int i;
/* already handled or inactive chain? */
if (chain->rules_next || !nft_is_active_next(net, chain))
if (chain->blob_next || !nft_is_active_next(net, chain))
return 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
i = 0;
list_for_each_entry_continue(rule, &chain->rules, list) {
if (nft_is_active_next(net, rule))
alloc++;
if (nft_is_active_next(net, rule)) {
data_size += sizeof(*prule) + rule->dlen;
if (data_size > INT_MAX)
return -ENOMEM;
}
}
data_size += offsetof(struct nft_rule_dp, data); /* last rule */
chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc);
if (!chain->rules_next)
chain->blob_next = nf_tables_chain_alloc_rules(data_size);
if (!chain->blob_next)
return -ENOMEM;
data = (void *)chain->blob_next->data;
data_boundary = data + data_size;
size = 0;
list_for_each_entry_continue(rule, &chain->rules, list) {
if (nft_is_active_next(net, rule))
chain->rules_next[i++] = rule;
if (!nft_is_active_next(net, rule))
continue;
prule = (struct nft_rule_dp *)data;
data += offsetof(struct nft_rule_dp, data);
if (WARN_ON_ONCE(data > data_boundary))
return -ENOMEM;
size = 0;
track.last = last;
nft_rule_for_each_expr(expr, last, rule) {
track.cur = expr;
if (expr->ops->reduce &&
expr->ops->reduce(&track, expr)) {
expr = track.cur;
continue;
}
if (WARN_ON_ONCE(data + expr->ops->size > data_boundary))
return -ENOMEM;
memcpy(data + size, expr, expr->ops->size);
size += expr->ops->size;
}
if (WARN_ON_ONCE(size >= 1 << 12))
return -ENOMEM;
prule->handle = rule->handle;
prule->dlen = size;
prule->is_last = 0;
data += size;
size = 0;
chain->blob_next->size += (unsigned long)(data - (void *)prule);
}
chain->rules_next[i] = NULL;
prule = (struct nft_rule_dp *)data;
data += offsetof(struct nft_rule_dp, data);
if (WARN_ON_ONCE(data > data_boundary))
return -ENOMEM;
nft_last_rule(chain->blob_next, prule);
return 0;
}
......@@ -8280,8 +8348,8 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net)
if (trans->msg_type == NFT_MSG_NEWRULE ||
trans->msg_type == NFT_MSG_DELRULE) {
kvfree(chain->rules_next);
chain->rules_next = NULL;
kvfree(chain->blob_next);
chain->blob_next = NULL;
}
}
}
......@@ -8290,38 +8358,34 @@ static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h)
{
struct nft_rules_old *o = container_of(h, struct nft_rules_old, h);
kvfree(o->start);
kvfree(o->blob);
}
static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
static void nf_tables_commit_chain_free_rules_old(struct nft_rule_blob *blob)
{
struct nft_rule **r = rules;
struct nft_rules_old *old;
while (*r)
r++;
r++; /* rcu_head is after end marker */
old = (void *) r;
old->start = rules;
/* rcu_head is after end marker */
old = (void *)blob + sizeof(*blob) + blob->size;
old->blob = blob;
call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
}
static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
{
struct nft_rule **g0, **g1;
struct nft_rule_blob *g0, *g1;
bool next_genbit;
next_genbit = nft_gencursor_next(net);
g0 = rcu_dereference_protected(chain->rules_gen_0,
g0 = rcu_dereference_protected(chain->blob_gen_0,
lockdep_commit_lock_is_held(net));
g1 = rcu_dereference_protected(chain->rules_gen_1,
g1 = rcu_dereference_protected(chain->blob_gen_1,
lockdep_commit_lock_is_held(net));
/* No changes to this chain? */
if (chain->rules_next == NULL) {
if (chain->blob_next == NULL) {
/* chain had no change in last or next generation */
if (g0 == g1)
return;
......@@ -8330,10 +8394,10 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
* one uses same rules as current generation.
*/
if (next_genbit) {
rcu_assign_pointer(chain->rules_gen_1, g0);
rcu_assign_pointer(chain->blob_gen_1, g0);
nf_tables_commit_chain_free_rules_old(g1);
} else {
rcu_assign_pointer(chain->rules_gen_0, g1);
rcu_assign_pointer(chain->blob_gen_0, g1);
nf_tables_commit_chain_free_rules_old(g0);
}
......@@ -8341,11 +8405,11 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
}
if (next_genbit)
rcu_assign_pointer(chain->rules_gen_1, chain->rules_next);
rcu_assign_pointer(chain->blob_gen_1, chain->blob_next);
else
rcu_assign_pointer(chain->rules_gen_0, chain->rules_next);
rcu_assign_pointer(chain->blob_gen_0, chain->blob_next);
chain->rules_next = NULL;
chain->blob_next = NULL;
if (g0 == g1)
return;
......
......@@ -38,7 +38,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info,
static inline void nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
const struct nft_rule *rule,
const struct nft_rule_dp *rule,
enum nft_trace_types type)
{
if (static_branch_unlikely(&nft_trace_enabled)) {
......@@ -67,6 +67,36 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
const struct nft_chain *chain,
const struct nft_regs *regs)
{
enum nft_trace_types type;
switch (regs->verdict.code) {
case NFT_CONTINUE:
case NFT_RETURN:
type = NFT_TRACETYPE_RETURN;
break;
default:
type = NFT_TRACETYPE_RULE;
break;
}
__nft_trace_packet(info, chain, type);
}
static inline void nft_trace_verdict(struct nft_traceinfo *info,
const struct nft_chain *chain,
const struct nft_rule_dp *rule,
const struct nft_regs *regs)
{
if (static_branch_unlikely(&nft_trace_enabled)) {
info->rule = rule;
__nft_trace_verdict(info, chain, regs);
}
}
static bool nft_payload_fast_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
......@@ -110,7 +140,6 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
base_chain = nft_base_chain(chain);
rcu_read_lock();
pstats = READ_ONCE(base_chain->stats);
if (pstats) {
local_bh_disable();
......@@ -121,12 +150,12 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
u64_stats_update_end(&stats->syncp);
local_bh_enable();
}
rcu_read_unlock();
}
struct nft_jumpstack {
const struct nft_chain *chain;
struct nft_rule *const *rules;
const struct nft_chain *chain;
const struct nft_rule_dp *rule;
const struct nft_rule_dp *last_rule;
};
static void expr_call_ops_eval(const struct nft_expr *expr,
......@@ -141,6 +170,7 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
X(e, nft_payload_eval);
X(e, nft_cmp_eval);
X(e, nft_counter_eval);
X(e, nft_meta_get_eval);
X(e, nft_lookup_eval);
X(e, nft_range_eval);
......@@ -154,18 +184,28 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
expr->ops->eval(expr, regs, pkt);
}
#define nft_rule_expr_first(rule) (struct nft_expr *)&rule->data[0]
#define nft_rule_expr_next(expr) ((void *)expr) + expr->ops->size
#define nft_rule_expr_last(rule) (struct nft_expr *)&rule->data[rule->dlen]
#define nft_rule_next(rule) (void *)rule + sizeof(*rule) + rule->dlen
#define nft_rule_dp_for_each_expr(expr, last, rule) \
for ((expr) = nft_rule_expr_first(rule), (last) = nft_rule_expr_last(rule); \
(expr) != (last); \
(expr) = nft_rule_expr_next(expr))
unsigned int
nft_do_chain(struct nft_pktinfo *pkt, void *priv)
{
const struct nft_chain *chain = priv, *basechain = chain;
const struct nft_rule_dp *rule, *last_rule;
const struct net *net = nft_net(pkt);
struct nft_rule *const *rules;
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
bool genbit = READ_ONCE(net->nft.gencursor);
struct nft_rule_blob *blob;
struct nft_traceinfo info;
info.trace = false;
......@@ -173,16 +213,16 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
nft_trace_init(&info, pkt, &regs.verdict, basechain);
do_chain:
if (genbit)
rules = rcu_dereference(chain->rules_gen_1);
blob = rcu_dereference(chain->blob_gen_1);
else
rules = rcu_dereference(chain->rules_gen_0);
blob = rcu_dereference(chain->blob_gen_0);
rule = (struct nft_rule_dp *)blob->data;
last_rule = (void *)blob->data + blob->size;
next_rule:
rule = *rules;
regs.verdict.code = NFT_CONTINUE;
for (; *rules ; rules++) {
rule = *rules;
nft_rule_for_each_expr(expr, last, rule) {
for (; rule < last_rule; rule = nft_rule_next(rule)) {
nft_rule_dp_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops)
nft_cmp_fast_eval(expr, &regs);
else if (expr->ops == &nft_bitwise_fast_ops)
......@@ -207,13 +247,13 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
break;
}
nft_trace_verdict(&info, chain, rule, &regs);
switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
case NF_STOLEN:
nft_trace_packet(&info, chain, rule,
NFT_TRACETYPE_RULE);
return regs.verdict.code;
}
......@@ -222,28 +262,25 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE))
return NF_DROP;
jumpstack[stackptr].chain = chain;
jumpstack[stackptr].rules = rules + 1;
jumpstack[stackptr].rule = nft_rule_next(rule);
jumpstack[stackptr].last_rule = last_rule;
stackptr++;
fallthrough;
case NFT_GOTO:
nft_trace_packet(&info, chain, rule,
NFT_TRACETYPE_RULE);
chain = regs.verdict.chain;
goto do_chain;
case NFT_CONTINUE:
case NFT_RETURN:
nft_trace_packet(&info, chain, rule,
NFT_TRACETYPE_RETURN);
break;
default:
WARN_ON(1);
WARN_ON_ONCE(1);
}
if (stackptr > 0) {
stackptr--;
chain = jumpstack[stackptr].chain;
rules = jumpstack[stackptr].rules;
rule = jumpstack[stackptr].rule;
last_rule = jumpstack[stackptr].last_rule;
goto next_rule;
}
......@@ -269,18 +306,22 @@ static struct nft_expr_type *nft_basic_types[] = {
&nft_rt_type,
&nft_exthdr_type,
&nft_last_type,
&nft_counter_type,
};
static struct nft_object_type *nft_basic_objects[] = {
#ifdef CONFIG_NETWORK_SECMARK
&nft_secmark_obj_type,
#endif
&nft_counter_obj_type,
};
int __init nf_tables_core_module_init(void)
{
int err, i, j = 0;
nft_counter_init_seqcount();
for (i = 0; i < ARRAY_SIZE(nft_basic_objects); i++) {
err = nft_register_obj(nft_basic_objects[i]);
if (err)
......
......@@ -142,7 +142,7 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
const struct nft_traceinfo *info)
{
if (!info->rule)
if (!info->rule || info->rule->is_last)
return 0;
/* a continue verdict with ->type == RETURN means that this is
......
......@@ -66,6 +66,7 @@ struct nfulnl_instance {
struct sk_buff *skb; /* pre-allocatd skb */
struct timer_list timer;
struct net *net;
netns_tracker ns_tracker;
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
u32 peer_portid; /* PORTID of the peer process */
......@@ -140,7 +141,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head)
struct nfulnl_instance *inst =
container_of(head, struct nfulnl_instance, rcu);
put_net(inst->net);
put_net_track(inst->net, &inst->ns_tracker);
kfree(inst);
module_put(THIS_MODULE);
}
......@@ -187,7 +188,7 @@ instance_create(struct net *net, u_int16_t group_num,
timer_setup(&inst->timer, nfulnl_timer, 0);
inst->net = get_net(net);
inst->net = get_net_track(net, &inst->ns_tracker, GFP_ATOMIC);
inst->peer_user_ns = user_ns;
inst->peer_portid = portid;
inst->group_num = group_num;
......
......@@ -225,7 +225,7 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
struct nf_ct_hook *ct_hook;
const struct nf_ct_hook *ct_hook;
int err;
if (verdict == NF_ACCEPT ||
......@@ -388,7 +388,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct net_device *outdev;
struct nf_conn *ct = NULL;
enum ip_conntrack_info ctinfo = 0;
struct nfnl_ct_hook *nfnl_ct;
const struct nfnl_ct_hook *nfnl_ct;
bool csum_verify;
char *secdata = NULL;
u32 seclen = 0;
......@@ -1104,7 +1104,7 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
return 0;
}
static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
static struct nf_conn *nfqnl_ct_parse(const struct nfnl_ct_hook *nfnl_ct,
const struct nlmsghdr *nlh,
const struct nlattr * const nfqa[],
struct nf_queue_entry *entry,
......@@ -1171,11 +1171,11 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
{
struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
u_int16_t queue_num = ntohs(info->nfmsg->res_id);
const struct nfnl_ct_hook *nfnl_ct;
struct nfqnl_msg_verdict_hdr *vhdr;
enum ip_conntrack_info ctinfo;
struct nfqnl_instance *queue;
struct nf_queue_entry *entry;
struct nfnl_ct_hook *nfnl_ct;
struct nf_conn *ct = NULL;
unsigned int verdict;
int err;
......
......@@ -278,12 +278,52 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
return 0;
}
static bool nft_bitwise_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
const struct nft_bitwise *bitwise;
if (!track->regs[priv->sreg].selector)
return false;
bitwise = nft_expr_priv(expr);
if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
track->regs[priv->dreg].bitwise &&
track->regs[priv->dreg].bitwise->ops == expr->ops &&
priv->sreg == bitwise->sreg &&
priv->dreg == bitwise->dreg &&
priv->op == bitwise->op &&
priv->len == bitwise->len &&
!memcmp(&priv->mask, &bitwise->mask, sizeof(priv->mask)) &&
!memcmp(&priv->xor, &bitwise->xor, sizeof(priv->xor)) &&
!memcmp(&priv->data, &bitwise->data, sizeof(priv->data))) {
track->cur = expr;
return true;
}
if (track->regs[priv->sreg].bitwise) {
track->regs[priv->dreg].selector = NULL;
track->regs[priv->dreg].bitwise = NULL;
return false;
}
if (priv->sreg != priv->dreg) {
track->regs[priv->dreg].selector =
track->regs[priv->sreg].selector;
}
track->regs[priv->dreg].bitwise = expr;
return false;
}
static const struct nft_expr_ops nft_bitwise_ops = {
.type = &nft_bitwise_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
.eval = nft_bitwise_eval,
.init = nft_bitwise_init,
.dump = nft_bitwise_dump,
.reduce = nft_bitwise_reduce,
.offload = nft_bitwise_offload,
};
......@@ -385,12 +425,49 @@ static int nft_bitwise_fast_offload(struct nft_offload_ctx *ctx,
return 0;
}
static bool nft_bitwise_fast_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
const struct nft_bitwise_fast_expr *bitwise;
if (!track->regs[priv->sreg].selector)
return false;
bitwise = nft_expr_priv(expr);
if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
track->regs[priv->dreg].bitwise &&
track->regs[priv->dreg].bitwise->ops == expr->ops &&
priv->sreg == bitwise->sreg &&
priv->dreg == bitwise->dreg &&
priv->mask == bitwise->mask &&
priv->xor == bitwise->xor) {
track->cur = expr;
return true;
}
if (track->regs[priv->sreg].bitwise) {
track->regs[priv->dreg].selector = NULL;
track->regs[priv->dreg].bitwise = NULL;
return false;
}
if (priv->sreg != priv->dreg) {
track->regs[priv->dreg].selector =
track->regs[priv->sreg].selector;
}
track->regs[priv->dreg].bitwise = expr;
return false;
}
const struct nft_expr_ops nft_bitwise_fast_ops = {
.type = &nft_bitwise_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise_fast_expr)),
.eval = NULL, /* inlined */
.init = nft_bitwise_fast_init,
.dump = nft_bitwise_fast_dump,
.reduce = nft_bitwise_fast_reduce,
.offload = nft_bitwise_fast_offload,
};
......@@ -427,3 +504,21 @@ struct nft_expr_type nft_bitwise_type __read_mostly = {
.maxattr = NFTA_BITWISE_MAX,
.owner = THIS_MODULE,
};
bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
const struct nft_expr *expr)
{
const struct nft_expr *last = track->last;
const struct nft_expr *next;
if (expr == last)
return false;
next = nft_expr_next(expr);
if (next->ops == &nft_bitwise_ops)
return nft_bitwise_reduce(track, next);
else if (next->ops == &nft_bitwise_fast_ops)
return nft_bitwise_fast_reduce(track, next);
return false;
}
......@@ -14,7 +14,7 @@
#include <net/netfilter/nf_conntrack_zones.h>
struct nft_connlimit {
struct nf_conncount_list list;
struct nf_conncount_list *list;
u32 limit;
bool invert;
};
......@@ -43,12 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
return;
}
if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) {
regs->verdict.code = NF_DROP;
return;
}
count = priv->list.count;
count = priv->list->count;
if ((count > priv->limit) ^ priv->invert) {
regs->verdict.code = NFT_BREAK;
......@@ -76,7 +76,11 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
invert = true;
}
nf_conncount_list_init(&priv->list);
priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
if (!priv->list)
return -ENOMEM;
nf_conncount_list_init(priv->list);
priv->limit = limit;
priv->invert = invert;
......@@ -87,7 +91,8 @@ static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
struct nft_connlimit *priv)
{
nf_ct_netns_put(ctx->net, ctx->family);
nf_conncount_cache_free(&priv->list);
nf_conncount_cache_free(priv->list);
kfree(priv->list);
}
static int nft_connlimit_do_dump(struct sk_buff *skb,
......@@ -200,7 +205,11 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
nf_conncount_list_init(&priv_dst->list);
priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
if (priv_dst->list)
return -ENOMEM;
nf_conncount_list_init(priv_dst->list);
priv_dst->limit = priv_src->limit;
priv_dst->invert = priv_src->invert;
......@@ -212,7 +221,8 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
{
struct nft_connlimit *priv = nft_expr_priv(expr);
nf_conncount_cache_free(&priv->list);
nf_conncount_cache_free(priv->list);
kfree(priv->list);
}
static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
......@@ -221,7 +231,7 @@ static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
bool ret;
local_bh_disable();
ret = nf_conncount_gc_list(net, &priv->list);
ret = nf_conncount_gc_list(net, priv->list);
local_bh_enable();
return ret;
......
......@@ -13,6 +13,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables_offload.h>
struct nft_counter {
......@@ -174,7 +175,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
static struct nft_object_type nft_counter_obj_type;
struct nft_object_type nft_counter_obj_type;
static const struct nft_object_ops nft_counter_obj_ops = {
.type = &nft_counter_obj_type,
.size = sizeof(struct nft_counter_percpu_priv),
......@@ -184,7 +185,7 @@ static const struct nft_object_ops nft_counter_obj_ops = {
.dump = nft_counter_obj_dump,
};
static struct nft_object_type nft_counter_obj_type __read_mostly = {
struct nft_object_type nft_counter_obj_type __read_mostly = {
.type = NFT_OBJECT_COUNTER,
.ops = &nft_counter_obj_ops,
.maxattr = NFTA_COUNTER_MAX,
......@@ -192,9 +193,8 @@ static struct nft_object_type nft_counter_obj_type __read_mostly = {
.owner = THIS_MODULE,
};
static void nft_counter_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
......@@ -275,7 +275,15 @@ static void nft_counter_offload_stats(struct nft_expr *expr,
preempt_enable();
}
static struct nft_expr_type nft_counter_type;
void nft_counter_init_seqcount(void)
{
int cpu;
for_each_possible_cpu(cpu)
seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
}
struct nft_expr_type nft_counter_type;
static const struct nft_expr_ops nft_counter_ops = {
.type = &nft_counter_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)),
......@@ -289,7 +297,7 @@ static const struct nft_expr_ops nft_counter_ops = {
.offload_stats = nft_counter_offload_stats,
};
static struct nft_expr_type nft_counter_type __read_mostly = {
struct nft_expr_type nft_counter_type __read_mostly = {
.name = "counter",
.ops = &nft_counter_ops,
.policy = nft_counter_policy,
......@@ -297,39 +305,3 @@ static struct nft_expr_type nft_counter_type __read_mostly = {
.flags = NFT_EXPR_STATEFUL,
.owner = THIS_MODULE,
};
static int __init nft_counter_module_init(void)
{
int cpu, err;
for_each_possible_cpu(cpu)
seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
err = nft_register_obj(&nft_counter_obj_type);
if (err < 0)
return err;
err = nft_register_expr(&nft_counter_type);
if (err < 0)
goto err1;
return 0;
err1:
nft_unregister_obj(&nft_counter_obj_type);
return err;
}
static void __exit nft_counter_module_exit(void)
{
nft_unregister_expr(&nft_counter_type);
nft_unregister_obj(&nft_counter_obj_type);
}
module_init(nft_counter_module_init);
module_exit(nft_counter_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("counter");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER);
MODULE_DESCRIPTION("nftables counter rule support");
......@@ -259,7 +259,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
ct = this_cpu_read(nft_ct_pcpu_template);
if (likely(atomic_read(&ct->ct_general.use) == 1)) {
if (likely(refcount_read(&ct->ct_general.use) == 1)) {
nf_ct_zone_add(ct, &zone);
} else {
/* previous skb got queued to userspace */
......@@ -270,7 +270,6 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
}
}
atomic_inc(&ct->ct_general.use);
nf_ct_set(skb, ct, IP_CT_NEW);
}
#endif
......@@ -375,7 +374,6 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
return false;
}
atomic_set(&tmp->ct_general.use, 1);
per_cpu(nft_ct_pcpu_template, cpu) = tmp;
}
......
......@@ -8,9 +8,13 @@
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
struct nft_last {
unsigned long jiffies;
unsigned int set;
};
struct nft_last_priv {
unsigned long last_jiffies;
unsigned int last_set;
struct nft_last *last;
};
static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = {
......@@ -22,47 +26,55 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_last_priv *priv = nft_expr_priv(expr);
struct nft_last *last;
u64 last_jiffies;
u32 last_set = 0;
int err;
if (tb[NFTA_LAST_SET]) {
last_set = ntohl(nla_get_be32(tb[NFTA_LAST_SET]));
if (last_set == 1)
priv->last_set = 1;
}
last = kzalloc(sizeof(*last), GFP_KERNEL);
if (!last)
return -ENOMEM;
if (tb[NFTA_LAST_SET])
last->set = ntohl(nla_get_be32(tb[NFTA_LAST_SET]));
if (last_set && tb[NFTA_LAST_MSECS]) {
if (last->set && tb[NFTA_LAST_MSECS]) {
err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies);
if (err < 0)
return err;
goto err;
priv->last_jiffies = jiffies - (unsigned long)last_jiffies;
last->jiffies = jiffies - (unsigned long)last_jiffies;
}
priv->last = last;
return 0;
err:
kfree(last);
return err;
}
static void nft_last_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt)
{
struct nft_last_priv *priv = nft_expr_priv(expr);
struct nft_last *last = priv->last;
if (READ_ONCE(priv->last_jiffies) != jiffies)
WRITE_ONCE(priv->last_jiffies, jiffies);
if (READ_ONCE(priv->last_set) == 0)
WRITE_ONCE(priv->last_set, 1);
if (READ_ONCE(last->jiffies) != jiffies)
WRITE_ONCE(last->jiffies, jiffies);
if (READ_ONCE(last->set) == 0)
WRITE_ONCE(last->set, 1);
}
static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_last_priv *priv = nft_expr_priv(expr);
unsigned long last_jiffies = READ_ONCE(priv->last_jiffies);
u32 last_set = READ_ONCE(priv->last_set);
struct nft_last *last = priv->last;
unsigned long last_jiffies = READ_ONCE(last->jiffies);
u32 last_set = READ_ONCE(last->set);
__be64 msecs;
if (time_before(jiffies, last_jiffies)) {
WRITE_ONCE(priv->last_set, 0);
WRITE_ONCE(last->set, 0);
last_set = 0;
}
......@@ -81,11 +93,32 @@ static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
return -1;
}
static void nft_last_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_last_priv *priv = nft_expr_priv(expr);
kfree(priv->last);
}
static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
{
struct nft_last_priv *priv_dst = nft_expr_priv(dst);
priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
if (priv_dst->last)
return -ENOMEM;
return 0;
}
static const struct nft_expr_ops nft_last_ops = {
.type = &nft_last_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_last_priv)),
.eval = nft_last_eval,
.init = nft_last_init,
.destroy = nft_last_destroy,
.clone = nft_last_clone,
.dump = nft_last_dump,
};
......
This diff is collapsed.
......@@ -750,16 +750,63 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx,
return 0;
}
static bool nft_meta_get_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct nft_meta *meta;
if (!track->regs[priv->dreg].selector ||
track->regs[priv->dreg].selector->ops != expr->ops) {
track->regs[priv->dreg].selector = expr;
track->regs[priv->dreg].bitwise = NULL;
return false;
}
meta = nft_expr_priv(track->regs[priv->dreg].selector);
if (priv->key != meta->key ||
priv->dreg != meta->dreg) {
track->regs[priv->dreg].selector = expr;
track->regs[priv->dreg].bitwise = NULL;
return false;
}
if (!track->regs[priv->dreg].bitwise)
return true;
return nft_expr_reduce_bitwise(track, expr);
}
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_get_eval,
.init = nft_meta_get_init,
.dump = nft_meta_get_dump,
.reduce = nft_meta_get_reduce,
.validate = nft_meta_get_validate,
.offload = nft_meta_get_offload,
};
static bool nft_meta_set_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
int i;
for (i = 0; i < NFT_REG32_NUM; i++) {
if (!track->regs[i].selector)
continue;
if (track->regs[i].selector->ops != &nft_meta_get_ops)
continue;
track->regs[i].selector = NULL;
track->regs[i].bitwise = NULL;
}
return false;
}
static const struct nft_expr_ops nft_meta_set_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
......@@ -767,6 +814,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.init = nft_meta_set_init,
.destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
.reduce = nft_meta_set_reduce,
.validate = nft_meta_set_validate,
};
......
......@@ -18,7 +18,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
struct nft_ng_inc {
u8 dreg;
u32 modulus;
atomic_t counter;
atomic_t *counter;
u32 offset;
};
......@@ -27,9 +27,9 @@ static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
u32 nval, oval;
do {
oval = atomic_read(&priv->counter);
oval = atomic_read(priv->counter);
nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
} while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
} while (atomic_cmpxchg(priv->counter, oval, nval) != oval);
return nval + priv->offset;
}
......@@ -55,6 +55,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_ng_inc *priv = nft_expr_priv(expr);
int err;
if (tb[NFTA_NG_OFFSET])
priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
......@@ -66,10 +67,22 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
atomic_set(&priv->counter, priv->modulus - 1);
priv->counter = kmalloc(sizeof(*priv->counter), GFP_KERNEL);
if (!priv->counter)
return -ENOMEM;
return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, sizeof(u32));
atomic_set(priv->counter, priv->modulus - 1);
err = nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, sizeof(u32));
if (err < 0)
goto err;
return 0;
err:
kfree(priv->counter);
return err;
}
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
......@@ -98,6 +111,14 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
priv->offset);
}
static void nft_ng_inc_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
const struct nft_ng_inc *priv = nft_expr_priv(expr);
kfree(priv->counter);
}
struct nft_ng_random {
u8 dreg;
u32 modulus;
......@@ -157,6 +178,7 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
.eval = nft_ng_inc_eval,
.init = nft_ng_inc_init,
.destroy = nft_ng_inc_destroy,
.dump = nft_ng_inc_dump,
};
......
......@@ -157,7 +157,8 @@ void nft_payload_eval(const struct nft_expr *expr,
goto err;
break;
default:
BUG();
WARN_ON_ONCE(1);
goto err;
}
offset += priv->offset;
......@@ -209,6 +210,34 @@ static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
return -1;
}
static bool nft_payload_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
const struct nft_payload *priv = nft_expr_priv(expr);
const struct nft_payload *payload;
if (!track->regs[priv->dreg].selector ||
track->regs[priv->dreg].selector->ops != expr->ops) {
track->regs[priv->dreg].selector = expr;
track->regs[priv->dreg].bitwise = NULL;
return false;
}
payload = nft_expr_priv(track->regs[priv->dreg].selector);
if (priv->base != payload->base ||
priv->offset != payload->offset ||
priv->len != payload->len) {
track->regs[priv->dreg].selector = expr;
track->regs[priv->dreg].bitwise = NULL;
return false;
}
if (!track->regs[priv->dreg].bitwise)
return true;
return nft_expr_reduce_bitwise(track, expr);
}
static bool nft_payload_offload_mask(struct nft_offload_reg *reg,
u32 priv_len, u32 field_len)
{
......@@ -512,6 +541,7 @@ static const struct nft_expr_ops nft_payload_ops = {
.eval = nft_payload_eval,
.init = nft_payload_init,
.dump = nft_payload_dump,
.reduce = nft_payload_reduce,
.offload = nft_payload_offload,
};
......@@ -521,6 +551,7 @@ const struct nft_expr_ops nft_payload_fast_ops = {
.eval = nft_payload_eval,
.init = nft_payload_init,
.dump = nft_payload_dump,
.reduce = nft_payload_reduce,
.offload = nft_payload_offload,
};
......@@ -664,7 +695,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
goto err;
break;
default:
BUG();
WARN_ON_ONCE(1);
goto err;
}
csum_offset = offset + priv->csum_offset;
......@@ -766,12 +798,33 @@ static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr
return -1;
}
static bool nft_payload_set_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
int i;
for (i = 0; i < NFT_REG32_NUM; i++) {
if (!track->regs[i].selector)
continue;
if (track->regs[i].selector->ops != &nft_payload_ops &&
track->regs[i].selector->ops != &nft_payload_fast_ops)
continue;
track->regs[i].selector = NULL;
track->regs[i].bitwise = NULL;
}
return false;
}
static const struct nft_expr_ops nft_payload_set_ops = {
.type = &nft_payload_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)),
.eval = nft_payload_set_eval,
.init = nft_payload_set_init,
.dump = nft_payload_set_dump,
.reduce = nft_payload_set_reduce,
};
static const struct nft_expr_ops *
......
......@@ -15,13 +15,13 @@
struct nft_quota {
atomic64_t quota;
unsigned long flags;
atomic64_t consumed;
atomic64_t *consumed;
};
static inline bool nft_overquota(struct nft_quota *priv,
const struct sk_buff *skb)
{
return atomic64_add_return(skb->len, &priv->consumed) >=
return atomic64_add_return(skb->len, priv->consumed) >=
atomic64_read(&priv->quota);
}
......@@ -90,13 +90,23 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
return -EOPNOTSUPP;
}
priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
if (!priv->consumed)
return -ENOMEM;
atomic64_set(&priv->quota, quota);
priv->flags = flags;
atomic64_set(&priv->consumed, consumed);
atomic64_set(priv->consumed, consumed);
return 0;
}
static void nft_quota_do_destroy(const struct nft_ctx *ctx,
struct nft_quota *priv)
{
kfree(priv->consumed);
}
static int nft_quota_obj_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_object *obj)
......@@ -128,7 +138,7 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
* that we see, don't go over the quota boundary in what we send to
* userspace.
*/
consumed = atomic64_read(&priv->consumed);
consumed = atomic64_read(priv->consumed);
quota = atomic64_read(&priv->quota);
if (consumed >= quota) {
consumed_cap = quota;
......@@ -145,7 +155,7 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
goto nla_put_failure;
if (reset) {
atomic64_sub(consumed, &priv->consumed);
atomic64_sub(consumed, priv->consumed);
clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags);
}
return 0;
......@@ -162,11 +172,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
return nft_quota_do_dump(skb, priv, reset);
}
static void nft_quota_obj_destroy(const struct nft_ctx *ctx,
struct nft_object *obj)
{
struct nft_quota *priv = nft_obj_data(obj);
return nft_quota_do_destroy(ctx, priv);
}
static struct nft_object_type nft_quota_obj_type;
static const struct nft_object_ops nft_quota_obj_ops = {
.type = &nft_quota_obj_type,
.size = sizeof(struct nft_quota),
.init = nft_quota_obj_init,
.destroy = nft_quota_obj_destroy,
.eval = nft_quota_obj_eval,
.dump = nft_quota_obj_dump,
.update = nft_quota_obj_update,
......@@ -205,12 +224,35 @@ static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
return nft_quota_do_dump(skb, priv, false);
}
static void nft_quota_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_quota *priv = nft_expr_priv(expr);
return nft_quota_do_destroy(ctx, priv);
}
static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
{
struct nft_quota *priv_dst = nft_expr_priv(dst);
priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
if (priv_dst->consumed)
return -ENOMEM;
atomic64_set(priv_dst->consumed, 0);
return 0;
}
static struct nft_expr_type nft_quota_type;
static const struct nft_expr_ops nft_quota_ops = {
.type = &nft_quota_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_quota)),
.eval = nft_quota_eval,
.init = nft_quota_init,
.destroy = nft_quota_destroy,
.clone = nft_quota_clone,
.dump = nft_quota_dump,
};
......
......@@ -1048,11 +1048,9 @@ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
struct nft_pipapo_field *f, int offset,
const u8 *pkt, bool first, bool last)
{
unsigned long *lt = f->lt, bsize = f->bsize;
unsigned long bsize = f->bsize;
int i, ret = -1, b;
lt += offset * NFT_PIPAPO_LONGS_PER_M256;
if (first)
memset(map, 0xff, bsize * sizeof(*map));
......
......@@ -24,7 +24,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
return XT_CONTINUE;
if (ct) {
atomic_inc(&ct->ct_general.use);
refcount_inc(&ct->ct_general.use);
nf_ct_set(skb, ct, IP_CT_NEW);
} else {
nf_ct_set(skb, ct, IP_CT_UNTRACKED);
......@@ -201,7 +201,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
goto err4;
}
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
nf_conntrack_get(&ct->ct_general);
out:
info->ct = ct;
return 0;
......
......@@ -576,7 +576,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
nf_ct_delete(ct, 0, 0);
nf_conntrack_put(&ct->ct_general);
nf_ct_put(ct);
}
}
......@@ -725,7 +725,7 @@ static bool skb_nfct_cached(struct net *net,
if (nf_ct_is_confirmed(ct))
nf_ct_delete(ct, 0, 0);
nf_conntrack_put(&ct->ct_general);
nf_ct_put(ct);
nf_ct_set(skb, NULL, 0);
return false;
}
......@@ -969,7 +969,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
/* Associate skb with specified zone. */
if (tmpl) {
nf_conntrack_put(skb_nfct(skb));
ct = nf_ct_get(skb, &ctinfo);
nf_ct_put(ct);
nf_conntrack_get(&tmpl->ct_general);
nf_ct_set(skb, tmpl, IP_CT_NEW);
}
......@@ -1334,7 +1335,12 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
{
nf_conntrack_put(skb_nfct(skb));
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
nf_ct_put(ct);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
ovs_ct_fill_key(skb, key, false);
......@@ -1722,7 +1728,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
goto err_free_ct;
__set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
nf_conntrack_get(&ct_info.ct->ct_general);
return 0;
err_free_ct:
__ovs_ct_free_action(&ct_info);
......
......@@ -614,7 +614,7 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
if (nf_ct_is_confirmed(ct))
nf_ct_kill(ct);
nf_conntrack_put(&ct->ct_general);
nf_ct_put(ct);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
return false;
......@@ -779,7 +779,7 @@ static void tcf_ct_params_free(struct rcu_head *head)
tcf_ct_flow_table_put(params);
if (params->tmpl)
nf_conntrack_put(&params->tmpl->ct_general);
nf_ct_put(params->tmpl);
kfree(params);
}
......@@ -983,7 +983,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
tc_skb_cb(skb)->post_ct = false;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
nf_conntrack_put(&ct->ct_general);
nf_ct_put(ct);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
}
......@@ -1249,7 +1249,6 @@ static int tcf_ct_fill_params(struct net *net,
return -ENOMEM;
}
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
nf_conntrack_get(&tmpl->ct_general);
p->tmpl = tmpl;
return 0;
......
......@@ -880,8 +880,9 @@ EOF
return $ksft_skip
fi
# test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
test_port_shadow "default" "CLIENT"
# test default behaviour. Packet from ns1 to ns0 is not redirected
# due to automatic port translation.
test_port_shadow "default" "ROUTER"
# test packet filter based mitigation: prevent forwarding of
# packets claiming to come from the service port.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment