Commit cc4e5eec authored by Jakub Kicinski's avatar Jakub Kicinski

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Restrict range element expansion in ipset to avoid soft lockup,
   from Jozsef Kadlecsik.

2) Memleak in error path for nf_conntrack_bridge for IPv4 packets,
   from Yajun Deng.

3) Simplify conntrack garbage collection strategy to avoid frequent
   wake-ups, from Florian Westphal.

4) Fix NFNLA_HOOK_FUNCTION_NAME string, do not include module name.

5) Missing chain family netlink attribute in chain description
   in nfnetlink_hook.

6) Incorrect sequence number on nfnetlink_hook dumps.

7) Use netlink request family in reply message for consistency.

8) Remove offload_pickup sysctl, use conntrack for established state
   instead, from Florian Westphal.

9) Translate NFPROTO_INET/ingress to NFPROTO_NETDEV/ingress, since
   NFPROTO_INET is not exposed through nfnetlink_hook.

* git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf:
  netfilter: nfnetlink_hook: translate inet ingress to netdev
  netfilter: conntrack: remove offload_pickup sysctl again
  netfilter: nfnetlink_hook: Use same family as request message
  netfilter: nfnetlink_hook: use the sequence number of the request message
  netfilter: nfnetlink_hook: missing chain family
  netfilter: nfnetlink_hook: strip off module name from hookfn
  netfilter: conntrack: collect all entries in one cycle
  netfilter: nf_conntrack_bridge: Fix memory leak when error
  netfilter: ipset: Limit the maximal range of consecutive elements to add/delete
====================

Link: https://lore.kernel.org/r/20210806151149.6356-1-pablo@netfilter.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 704e624f 269fc695
...@@ -191,19 +191,9 @@ nf_flowtable_tcp_timeout - INTEGER (seconds) ...@@ -191,19 +191,9 @@ nf_flowtable_tcp_timeout - INTEGER (seconds)
TCP connections may be offloaded from nf conntrack to nf flow table. TCP connections may be offloaded from nf conntrack to nf flow table.
Once aged, the connection is returned to nf conntrack with tcp pickup timeout. Once aged, the connection is returned to nf conntrack with tcp pickup timeout.
nf_flowtable_tcp_pickup - INTEGER (seconds)
default 120
TCP connection timeout after being aged from nf flow table offload.
nf_flowtable_udp_timeout - INTEGER (seconds) nf_flowtable_udp_timeout - INTEGER (seconds)
default 30 default 30
Control offload timeout for udp connections. Control offload timeout for udp connections.
UDP connections may be offloaded from nf conntrack to nf flow table. UDP connections may be offloaded from nf conntrack to nf flow table.
Once aged, the connection is returned to nf conntrack with udp pickup timeout. Once aged, the connection is returned to nf conntrack with udp pickup timeout.
nf_flowtable_udp_pickup - INTEGER (seconds)
default 30
UDP connection timeout after being aged from nf flow table offload.
...@@ -196,6 +196,9 @@ struct ip_set_region { ...@@ -196,6 +196,9 @@ struct ip_set_region {
u32 elements; /* Number of elements vs timeout */ u32 elements; /* Number of elements vs timeout */
}; };
/* Max range where every element is added/deleted in one step */
#define IPSET_MAX_RANGE (1<<20)
/* The max revision number supported by any set type + 1 */ /* The max revision number supported by any set type + 1 */
#define IPSET_REVISION_MAX 9 #define IPSET_REVISION_MAX 9
......
...@@ -30,7 +30,6 @@ struct nf_tcp_net { ...@@ -30,7 +30,6 @@ struct nf_tcp_net {
u8 tcp_ignore_invalid_rst; u8 tcp_ignore_invalid_rst;
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
unsigned int offload_timeout; unsigned int offload_timeout;
unsigned int offload_pickup;
#endif #endif
}; };
...@@ -44,7 +43,6 @@ struct nf_udp_net { ...@@ -44,7 +43,6 @@ struct nf_udp_net {
unsigned int timeouts[UDP_CT_MAX]; unsigned int timeouts[UDP_CT_MAX];
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
unsigned int offload_timeout; unsigned int offload_timeout;
unsigned int offload_pickup;
#endif #endif
}; };
......
...@@ -43,6 +43,15 @@ enum nfnl_hook_chain_info_attributes { ...@@ -43,6 +43,15 @@ enum nfnl_hook_chain_info_attributes {
}; };
#define NFNLA_HOOK_INFO_MAX (__NFNLA_HOOK_INFO_MAX - 1) #define NFNLA_HOOK_INFO_MAX (__NFNLA_HOOK_INFO_MAX - 1)
enum nfnl_hook_chain_desc_attributes {
NFNLA_CHAIN_UNSPEC,
NFNLA_CHAIN_TABLE,
NFNLA_CHAIN_FAMILY,
NFNLA_CHAIN_NAME,
__NFNLA_CHAIN_MAX,
};
#define NFNLA_CHAIN_MAX (__NFNLA_CHAIN_MAX - 1)
/** /**
* enum nfnl_hook_chaintype - chain type * enum nfnl_hook_chaintype - chain type
* *
......
...@@ -88,6 +88,12 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, ...@@ -88,6 +88,12 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
skb = ip_fraglist_next(&iter); skb = ip_fraglist_next(&iter);
} }
if (!err)
return 0;
kfree_skb_list(iter.frag);
return err; return err;
} }
slow_path: slow_path:
......
...@@ -132,8 +132,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -132,8 +132,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret) if (ret)
return ret; return ret;
if (ip > ip_to) if (ip > ip_to) {
if (ip_to == 0)
return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to); swap(ip, ip_to);
}
} else if (tb[IPSET_ATTR_CIDR]) { } else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
...@@ -144,6 +147,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -144,6 +147,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
/* 64bit division is not allowed on 32bit */
if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) { if (retried) {
ip = ntohl(h->next.ip); ip = ntohl(h->next.ip);
e.ip = htonl(ip); e.ip = htonl(ip);
......
...@@ -121,6 +121,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -121,6 +121,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK])); e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask; e.mark &= h->markmask;
if (e.mark == 0 && e.ip == 0)
return -IPSET_ERR_HASH_ELEM;
if (adt == IPSET_TEST || if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) { !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
...@@ -133,8 +135,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -133,8 +135,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret) if (ret)
return ret; return ret;
if (ip > ip_to) if (ip > ip_to) {
if (e.mark == 0 && ip_to == 0)
return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to); swap(ip, ip_to);
}
} else if (tb[IPSET_ATTR_CIDR]) { } else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
...@@ -143,6 +148,9 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -143,6 +148,9 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr); ip_set_mask_from_to(ip, ip_to, cidr);
} }
if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) if (retried)
ip = ntohl(h->next.ip); ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) { for (; ip <= ip_to; ip++) {
......
...@@ -173,6 +173,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -173,6 +173,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to); swap(port, port_to);
} }
if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) if (retried)
ip = ntohl(h->next.ip); ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) { for (; ip <= ip_to; ip++) {
......
...@@ -180,6 +180,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -180,6 +180,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to); swap(port, port_to);
} }
if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) if (retried)
ip = ntohl(h->next.ip); ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) { for (; ip <= ip_to; ip++) {
......
...@@ -253,6 +253,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -253,6 +253,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to); swap(port, port_to);
} }
if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
return -ERANGE;
ip2_to = ip2_from; ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) { if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
......
...@@ -140,7 +140,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -140,7 +140,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt]; ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK }; struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0; u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret; int ret;
if (tb[IPSET_ATTR_LINENO]) if (tb[IPSET_ATTR_LINENO])
...@@ -188,6 +188,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -188,6 +188,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to) if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE; return -IPSET_ERR_HASH_RANGE;
} }
ipn = ip;
do {
ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
n++;
} while (ipn++ < ip_to);
if (n > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) if (retried)
ip = ntohl(h->next.ip); ip = ntohl(h->next.ip);
do { do {
......
...@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt]; ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0; u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret; int ret;
if (tb[IPSET_ATTR_LINENO]) if (tb[IPSET_ATTR_LINENO])
...@@ -256,6 +256,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -256,6 +256,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
} else { } else {
ip_set_mask_from_to(ip, ip_to, e.cidr); ip_set_mask_from_to(ip, ip_to, e.cidr);
} }
ipn = ip;
do {
ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
n++;
} while (ipn++ < ip_to);
if (n > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) if (retried)
ip = ntohl(h->next.ip); ip = ntohl(h->next.ip);
......
...@@ -168,7 +168,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -168,7 +168,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netnet4_elem e = { }; struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0; u32 ip = 0, ip_to = 0;
u32 ip2 = 0, ip2_from = 0, ip2_to = 0; u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
u64 n = 0, m = 0;
int ret; int ret;
if (tb[IPSET_ATTR_LINENO]) if (tb[IPSET_ATTR_LINENO])
...@@ -244,6 +245,19 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -244,6 +245,19 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else { } else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
} }
ipn = ip;
do {
ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
n++;
} while (ipn++ < ip_to);
ipn = ip2_from;
do {
ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
m++;
} while (ipn++ < ip2_to);
if (n*m > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) { if (retried) {
ip = ntohl(h->next.ip[0]); ip = ntohl(h->next.ip[0]);
......
...@@ -158,7 +158,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -158,7 +158,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt]; ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to, p = 0, ip = 0, ip_to = 0; u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
u64 n = 0;
bool with_ports = false; bool with_ports = false;
u8 cidr; u8 cidr;
int ret; int ret;
...@@ -235,6 +236,14 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -235,6 +236,14 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else { } else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1); ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
} }
ipn = ip;
do {
ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
n++;
} while (ipn++ < ip_to);
if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) { if (retried) {
ip = ntohl(h->next.ip); ip = ntohl(h->next.ip);
......
...@@ -182,7 +182,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -182,7 +182,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netportnet4_elem e = { }; struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to; u32 ip = 0, ip_to = 0, p = 0, port, port_to;
u32 ip2_from = 0, ip2_to = 0, ip2; u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
u64 n = 0, m = 0;
bool with_ports = false; bool with_ports = false;
int ret; int ret;
...@@ -284,6 +285,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ...@@ -284,6 +285,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else { } else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
} }
ipn = ip;
do {
ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
n++;
} while (ipn++ < ip_to);
ipn = ip2_from;
do {
ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
m++;
} while (ipn++ < ip2_to);
if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) { if (retried) {
ip = ntohl(h->next.ip[0]); ip = ntohl(h->next.ip[0]);
......
...@@ -66,22 +66,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); ...@@ -66,22 +66,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
struct conntrack_gc_work { struct conntrack_gc_work {
struct delayed_work dwork; struct delayed_work dwork;
u32 last_bucket; u32 next_bucket;
bool exiting; bool exiting;
bool early_drop; bool early_drop;
long next_gc_run;
}; };
static __read_mostly struct kmem_cache *nf_conntrack_cachep; static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all; static __read_mostly bool nf_conntrack_locks_all;
/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ #define GC_SCAN_INTERVAL (120u * HZ)
#define GC_MAX_BUCKETS_DIV 128u #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
/* upper bound of full table scan */
#define GC_MAX_SCAN_JIFFIES (16u * HZ)
/* desired ratio of entries found to be expired */
#define GC_EVICT_RATIO 50u
static struct conntrack_gc_work conntrack_gc_work; static struct conntrack_gc_work conntrack_gc_work;
...@@ -1363,17 +1358,13 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) ...@@ -1363,17 +1358,13 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
static void gc_worker(struct work_struct *work) static void gc_worker(struct work_struct *work)
{ {
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
unsigned int i, goal, buckets = 0, expired_count = 0; unsigned int i, hashsz, nf_conntrack_max95 = 0;
unsigned int nf_conntrack_max95 = 0; unsigned long next_run = GC_SCAN_INTERVAL;
struct conntrack_gc_work *gc_work; struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0;
unsigned long next_run;
gc_work = container_of(work, struct conntrack_gc_work, dwork.work); gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; i = gc_work->next_bucket;
i = gc_work->last_bucket;
if (gc_work->early_drop) if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
...@@ -1381,15 +1372,15 @@ static void gc_worker(struct work_struct *work) ...@@ -1381,15 +1372,15 @@ static void gc_worker(struct work_struct *work)
struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash; struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n; struct hlist_nulls_node *n;
unsigned int hashsz;
struct nf_conn *tmp; struct nf_conn *tmp;
i++;
rcu_read_lock(); rcu_read_lock();
nf_conntrack_get_ht(&ct_hash, &hashsz); nf_conntrack_get_ht(&ct_hash, &hashsz);
if (i >= hashsz) if (i >= hashsz) {
i = 0; rcu_read_unlock();
break;
}
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct nf_conntrack_net *cnet; struct nf_conntrack_net *cnet;
...@@ -1397,7 +1388,6 @@ static void gc_worker(struct work_struct *work) ...@@ -1397,7 +1388,6 @@ static void gc_worker(struct work_struct *work)
tmp = nf_ct_tuplehash_to_ctrack(h); tmp = nf_ct_tuplehash_to_ctrack(h);
scanned++;
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
nf_ct_offload_timeout(tmp); nf_ct_offload_timeout(tmp);
continue; continue;
...@@ -1405,7 +1395,6 @@ static void gc_worker(struct work_struct *work) ...@@ -1405,7 +1395,6 @@ static void gc_worker(struct work_struct *work)
if (nf_ct_is_expired(tmp)) { if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp); nf_ct_gc_expired(tmp);
expired_count++;
continue; continue;
} }
...@@ -1438,7 +1427,14 @@ static void gc_worker(struct work_struct *work) ...@@ -1438,7 +1427,14 @@ static void gc_worker(struct work_struct *work)
*/ */
rcu_read_unlock(); rcu_read_unlock();
cond_resched(); cond_resched();
} while (++buckets < goal); i++;
if (time_after(jiffies, end_time) && i < hashsz) {
gc_work->next_bucket = i;
next_run = 0;
break;
}
} while (i < hashsz);
if (gc_work->exiting) if (gc_work->exiting)
return; return;
...@@ -1449,40 +1445,17 @@ static void gc_worker(struct work_struct *work) ...@@ -1449,40 +1445,17 @@ static void gc_worker(struct work_struct *work)
* *
* This worker is only here to reap expired entries when system went * This worker is only here to reap expired entries when system went
* idle after a busy period. * idle after a busy period.
*
* The heuristics below are supposed to balance conflicting goals:
*
* 1. Minimize time until we notice a stale entry
* 2. Maximize scan intervals to not waste cycles
*
* Normally, expire ratio will be close to 0.
*
* As soon as a sizeable fraction of the entries have expired
* increase scan frequency.
*/ */
ratio = scanned ? expired_count * 100 / scanned : 0; if (next_run) {
if (ratio > GC_EVICT_RATIO) {
gc_work->next_gc_run = min_interval;
} else {
unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
gc_work->next_gc_run += min_interval;
if (gc_work->next_gc_run > max)
gc_work->next_gc_run = max;
}
next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
gc_work->early_drop = false; gc_work->early_drop = false;
gc_work->next_bucket = 0;
}
queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
} }
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{ {
INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
gc_work->next_gc_run = HZ;
gc_work->exiting = false; gc_work->exiting = false;
} }
......
...@@ -1478,7 +1478,6 @@ void nf_conntrack_tcp_init_net(struct net *net) ...@@ -1478,7 +1478,6 @@ void nf_conntrack_tcp_init_net(struct net *net)
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
tn->offload_timeout = 30 * HZ; tn->offload_timeout = 30 * HZ;
tn->offload_pickup = 120 * HZ;
#endif #endif
} }
......
...@@ -271,7 +271,6 @@ void nf_conntrack_udp_init_net(struct net *net) ...@@ -271,7 +271,6 @@ void nf_conntrack_udp_init_net(struct net *net)
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
un->offload_timeout = 30 * HZ; un->offload_timeout = 30 * HZ;
un->offload_pickup = 30 * HZ;
#endif #endif
} }
......
...@@ -575,7 +575,6 @@ enum nf_ct_sysctl_index { ...@@ -575,7 +575,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
#endif #endif
NF_SYSCTL_CT_PROTO_TCP_LOOSE, NF_SYSCTL_CT_PROTO_TCP_LOOSE,
NF_SYSCTL_CT_PROTO_TCP_LIBERAL, NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
...@@ -585,7 +584,6 @@ enum nf_ct_sysctl_index { ...@@ -585,7 +584,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
#endif #endif
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP, NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6, NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
...@@ -776,12 +774,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { ...@@ -776,12 +774,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_jiffies, .proc_handler = proc_dointvec_jiffies,
}, },
[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
.procname = "nf_flowtable_tcp_pickup",
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
#endif #endif
[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
.procname = "nf_conntrack_tcp_loose", .procname = "nf_conntrack_tcp_loose",
...@@ -832,12 +824,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { ...@@ -832,12 +824,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_jiffies, .proc_handler = proc_dointvec_jiffies,
}, },
[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
.procname = "nf_flowtable_udp_pickup",
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
#endif #endif
[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = { [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
.procname = "nf_conntrack_icmp_timeout", .procname = "nf_conntrack_icmp_timeout",
...@@ -1018,7 +1004,6 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, ...@@ -1018,7 +1004,6 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout; table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
#endif #endif
} }
...@@ -1111,7 +1096,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) ...@@ -1111,7 +1096,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED]; table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout; table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
#endif #endif
nf_conntrack_standalone_init_tcp_sysctl(net, table); nf_conntrack_standalone_init_tcp_sysctl(net, table);
......
...@@ -183,7 +183,7 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) ...@@ -183,7 +183,7 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
const struct nf_conntrack_l4proto *l4proto; const struct nf_conntrack_l4proto *l4proto;
struct net *net = nf_ct_net(ct); struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct); int l4num = nf_ct_protonum(ct);
unsigned int timeout; s32 timeout;
l4proto = nf_ct_l4proto_find(l4num); l4proto = nf_ct_l4proto_find(l4num);
if (!l4proto) if (!l4proto)
...@@ -192,15 +192,20 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) ...@@ -192,15 +192,20 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
if (l4num == IPPROTO_TCP) { if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net); struct nf_tcp_net *tn = nf_tcp_pernet(net);
timeout = tn->offload_pickup; timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) { } else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net); struct nf_udp_net *tn = nf_udp_pernet(net);
timeout = tn->offload_pickup; timeout = tn->timeouts[UDP_CT_REPLIED];
timeout -= tn->offload_timeout;
} else { } else {
return; return;
} }
if (timeout < 0)
timeout = 0;
if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
ct->timeout = nfct_time_stamp + timeout; ct->timeout = nfct_time_stamp + timeout;
} }
......
...@@ -89,11 +89,15 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb, ...@@ -89,11 +89,15 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
if (!nest2) if (!nest2)
goto cancel_nest; goto cancel_nest;
ret = nla_put_string(nlskb, NFTA_CHAIN_TABLE, chain->table->name); ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name);
if (ret) if (ret)
goto cancel_nest; goto cancel_nest;
ret = nla_put_string(nlskb, NFTA_CHAIN_NAME, chain->name); ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name);
if (ret)
goto cancel_nest;
ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family);
if (ret) if (ret)
goto cancel_nest; goto cancel_nest;
...@@ -109,18 +113,19 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb, ...@@ -109,18 +113,19 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
static int nfnl_hook_dump_one(struct sk_buff *nlskb, static int nfnl_hook_dump_one(struct sk_buff *nlskb,
const struct nfnl_dump_hook_data *ctx, const struct nfnl_dump_hook_data *ctx,
const struct nf_hook_ops *ops, const struct nf_hook_ops *ops,
unsigned int seq) int family, unsigned int seq)
{ {
u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET); u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET);
unsigned int portid = NETLINK_CB(nlskb).portid; unsigned int portid = NETLINK_CB(nlskb).portid;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
int ret = -EMSGSIZE; int ret = -EMSGSIZE;
u32 hooknum;
#ifdef CONFIG_KALLSYMS #ifdef CONFIG_KALLSYMS
char sym[KSYM_SYMBOL_LEN]; char sym[KSYM_SYMBOL_LEN];
char *module_name; char *module_name;
#endif #endif
nlh = nfnl_msg_put(nlskb, portid, seq, event, nlh = nfnl_msg_put(nlskb, portid, seq, event,
NLM_F_MULTI, ops->pf, NFNETLINK_V0, 0); NLM_F_MULTI, family, NFNETLINK_V0, 0);
if (!nlh) if (!nlh)
goto nla_put_failure; goto nla_put_failure;
...@@ -135,6 +140,7 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, ...@@ -135,6 +140,7 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
if (module_name) { if (module_name) {
char *end; char *end;
*module_name = '\0';
module_name += 2; module_name += 2;
end = strchr(module_name, ']'); end = strchr(module_name, ']');
if (end) { if (end) {
...@@ -151,7 +157,12 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, ...@@ -151,7 +157,12 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
goto nla_put_failure; goto nla_put_failure;
#endif #endif
ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(ops->hooknum)); if (ops->pf == NFPROTO_INET && ops->hooknum == NF_INET_INGRESS)
hooknum = NF_NETDEV_INGRESS;
else
hooknum = ops->hooknum;
ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(hooknum));
if (ret) if (ret)
goto nla_put_failure; goto nla_put_failure;
...@@ -259,7 +270,8 @@ static int nfnl_hook_dump(struct sk_buff *nlskb, ...@@ -259,7 +270,8 @@ static int nfnl_hook_dump(struct sk_buff *nlskb,
ops = nf_hook_entries_get_hook_ops(e); ops = nf_hook_entries_get_hook_ops(e);
for (; i < e->num_hook_entries; i++) { for (; i < e->num_hook_entries; i++) {
err = nfnl_hook_dump_one(nlskb, ctx, ops[i], cb->seq); err = nfnl_hook_dump_one(nlskb, ctx, ops[i], family,
cb->nlh->nlmsg_seq);
if (err) if (err)
break; break;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment