Commit 855404ef authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
netfilter/IPVS updates for net-next

The following patchset contains Netfilter updates for your net-next tree,
they are:

* Add full port randomization support. Some crazy researchers found a way
  to reconstruct the secure ephemeral ports that are allocated in random mode
  by sending off-path bursts of UDP packets to overrun the socket buffer of
  the DNS resolver to trigger retransmissions, then if the timing for the
  DNS resolution done by a client is larger than usual, then they conclude
  that the port that received the burst of UDP packets is the one that was
  opened. It seems a bit aggressive method to me but it seems to work for
  them. As a result, Daniel Borkmann and Hannes Frederic Sowa came up with a
  new NAT mode to fully randomize ports using prandom.

* Add a new classifier to x_tables based on the socket net_cls set via
  cgroups. These includes two patches to prepare the field as requested by
  Zefan Li. Also from Daniel Borkmann.

* Use prandom instead of get_random_bytes in several locations of the
  netfilter code, from Florian Westphal.

* Allow to use the CTA_MARK_MASK in ctnetlink when mangling the conntrack
  mark, also from Florian Westphal.

* Fix compilation warning due to unused variable in IPVS, from Geert
  Uytterhoeven.

* Add support for UID/GID via nfnetlink_queue, from Valentina Giusti.

* Add IPComp extension to x_tables, from Fan Du.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a1d4b03a 82a37132
......@@ -6,6 +6,8 @@ tag network packets with a class identifier (classid).
The Traffic Controller (tc) can be used to assign
different priorities to packets from different cgroups.
Also, Netfilter (iptables) can use this tag to perform
actions on such packets.
Creating a net_cls cgroups instance creates a net_cls.classid file.
This net_cls.classid value is initialized to 0.
......@@ -32,3 +34,6 @@ tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
- creating traffic class 10:1
tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
configuring iptables, basic example:
iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
......@@ -31,7 +31,7 @@ SUBSYS(devices)
SUBSYS(freezer)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP)
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_NET_CLASSID)
SUBSYS(net_cls)
#endif
......@@ -43,7 +43,7 @@ SUBSYS(blkio)
SUBSYS(perf)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP)
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_NET_PRIO)
SUBSYS(net_prio)
#endif
......
......@@ -1444,7 +1444,7 @@ struct net_device {
/* max exchange id for FCoE LRO by ddp */
unsigned int fcoe_ddp_xid;
#endif
#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
struct netprio_map __rcu *priomap;
#endif
/* phy device may attach itself for hardware timestamping */
......
......@@ -331,7 +331,6 @@ extern ip_set_id_t ip_set_get_byname(struct net *net,
const char *name, struct ip_set **set);
extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
extern ip_set_id_t ip_set_nfnl_get(struct net *net, const char *name);
extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
......
......@@ -16,17 +16,16 @@
#include <linux/cgroup.h>
#include <linux/hardirq.h>
#include <linux/rcupdate.h>
#include <net/sock.h>
#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
struct cgroup_cls_state
{
#ifdef CONFIG_CGROUP_NET_CLASSID
struct cgroup_cls_state {
struct cgroup_subsys_state css;
u32 classid;
};
void sock_update_classid(struct sock *sk);
struct cgroup_cls_state *task_cls_state(struct task_struct *p);
#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
static inline u32 task_cls_classid(struct task_struct *p)
{
u32 classid;
......@@ -41,33 +40,18 @@ static inline u32 task_cls_classid(struct task_struct *p)
return classid;
}
#elif IS_MODULE(CONFIG_NET_CLS_CGROUP)
static inline u32 task_cls_classid(struct task_struct *p)
{
struct cgroup_subsys_state *css;
u32 classid = 0;
if (in_interrupt())
return 0;
rcu_read_lock();
css = task_css(p, net_cls_subsys_id);
if (css)
classid = container_of(css,
struct cgroup_cls_state, css)->classid;
rcu_read_unlock();
return classid;
}
#endif
#else /* !CGROUP_NET_CLS_CGROUP */
static inline void sock_update_classid(struct sock *sk)
{
}
u32 classid;
static inline u32 task_cls_classid(struct task_struct *p)
classid = task_cls_classid(current);
if (classid != sk->sk_classid)
sk->sk_classid = classid;
}
#else /* !CONFIG_CGROUP_NET_CLASSID */
static inline void sock_update_classid(struct sock *sk)
{
return 0;
}
#endif /* CGROUP_NET_CLS_CGROUP */
#endif /* CONFIG_CGROUP_NET_CLASSID */
#endif /* _NET_CLS_CGROUP_H */
......@@ -19,6 +19,4 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
int nf_conntrack_ipv4_compat_init(void);
void nf_conntrack_ipv4_compat_fini(void);
void need_ipv4_conntrack(void);
#endif /*_NF_CONNTRACK_IPV4_H*/
......@@ -87,7 +87,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto);
void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto);
struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p);
/* Existing built-in protocols */
extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
......
......@@ -65,6 +65,23 @@ struct nf_ip_net {
struct netns_ct {
atomic_t count;
unsigned int expect_count;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
struct ctl_table_header *tstamp_sysctl_header;
struct ctl_table_header *event_sysctl_header;
struct ctl_table_header *helper_sysctl_header;
#endif
char *slabname;
unsigned int sysctl_log_invalid; /* Log invalid packets */
unsigned int sysctl_events_retry_timeout;
int sysctl_events;
int sysctl_acct;
int sysctl_auto_assign_helper;
bool auto_assign_helper_warned;
int sysctl_tstamp;
int sysctl_checksum;
unsigned int htable_size;
struct kmem_cache *nf_conntrack_cachep;
struct hlist_nulls_head *hash;
......@@ -75,14 +92,6 @@ struct netns_ct {
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
int sysctl_events;
unsigned int sysctl_events_retry_timeout;
int sysctl_acct;
int sysctl_tstamp;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
int sysctl_auto_assign_helper;
bool auto_assign_helper_warned;
struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS)
unsigned int labels_used;
......@@ -92,13 +101,5 @@ struct netns_ct {
struct hlist_head *nat_bysource;
unsigned int nat_htable_size;
#endif
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
struct ctl_table_header *tstamp_sysctl_header;
struct ctl_table_header *event_sysctl_header;
struct ctl_table_header *helper_sysctl_header;
#endif
char *slabname;
};
#endif
......@@ -13,12 +13,12 @@
#ifndef _NETPRIO_CGROUP_H
#define _NETPRIO_CGROUP_H
#include <linux/cgroup.h>
#include <linux/hardirq.h>
#include <linux/rcupdate.h>
#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
struct netprio_map {
struct rcu_head rcu;
u32 priomap_len;
......@@ -27,8 +27,7 @@ struct netprio_map {
void sock_update_netprioidx(struct sock *sk);
#if IS_BUILTIN(CONFIG_NETPRIO_CGROUP)
#if IS_BUILTIN(CONFIG_CGROUP_NET_PRIO)
static inline u32 task_netprioidx(struct task_struct *p)
{
struct cgroup_subsys_state *css;
......@@ -40,9 +39,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
rcu_read_unlock();
return idx;
}
#elif IS_MODULE(CONFIG_NETPRIO_CGROUP)
#elif IS_MODULE(CONFIG_CGROUP_NET_PRIO)
static inline u32 task_netprioidx(struct task_struct *p)
{
struct cgroup_subsys_state *css;
......@@ -56,9 +53,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
return idx;
}
#endif
#else /* !CONFIG_NETPRIO_CGROUP */
#else /* !CONFIG_CGROUP_NET_PRIO */
static inline u32 task_netprioidx(struct task_struct *p)
{
return 0;
......@@ -66,6 +61,5 @@ static inline u32 task_netprioidx(struct task_struct *p)
#define sock_update_netprioidx(sk)
#endif /* CONFIG_NETPRIO_CGROUP */
#endif /* CONFIG_CGROUP_NET_PRIO */
#endif /* _NET_CLS_CGROUP_H */
......@@ -395,7 +395,7 @@ struct sock {
unsigned short sk_ack_backlog;
unsigned short sk_max_ack_backlog;
__u32 sk_priority;
#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
__u32 sk_cgrp_prioidx;
#endif
struct pid *sk_peer_pid;
......
......@@ -39,6 +39,7 @@ header-y += xt_TEE.h
header-y += xt_TPROXY.h
header-y += xt_addrtype.h
header-y += xt_bpf.h
header-y += xt_cgroup.h
header-y += xt_cluster.h
header-y += xt_comment.h
header-y += xt_connbytes.h
......@@ -54,6 +55,7 @@ header-y += xt_ecn.h
header-y += xt_esp.h
header-y += xt_hashlimit.h
header-y += xt_helper.h
header-y += xt_ipcomp.h
header-y += xt_iprange.h
header-y += xt_ipvs.h
header-y += xt_length.h
......
......@@ -4,10 +4,14 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#define NF_NAT_RANGE_MAP_IPS 1
#define NF_NAT_RANGE_PROTO_SPECIFIED 2
#define NF_NAT_RANGE_PROTO_RANDOM 4
#define NF_NAT_RANGE_PERSISTENT 8
#define NF_NAT_RANGE_MAP_IPS (1 << 0)
#define NF_NAT_RANGE_PROTO_SPECIFIED (1 << 1)
#define NF_NAT_RANGE_PROTO_RANDOM (1 << 2)
#define NF_NAT_RANGE_PERSISTENT (1 << 3)
#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4)
#define NF_NAT_RANGE_PROTO_RANDOM_ALL \
(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
struct nf_nat_ipv4_range {
unsigned int flags;
......
......@@ -47,6 +47,8 @@ enum nfqnl_attr_type {
NFQA_CAP_LEN, /* __u32 length of captured packet */
NFQA_SKB_INFO, /* __u32 skb meta information */
NFQA_EXP, /* nf_conntrack_netlink.h */
NFQA_UID, /* __u32 sk uid */
NFQA_GID, /* __u32 sk gid */
__NFQA_MAX
};
......@@ -99,7 +101,8 @@ enum nfqnl_attr_config {
#define NFQA_CFG_F_FAIL_OPEN (1 << 0)
#define NFQA_CFG_F_CONNTRACK (1 << 1)
#define NFQA_CFG_F_GSO (1 << 2)
#define NFQA_CFG_F_MAX (1 << 3)
#define NFQA_CFG_F_UID_GID (1 << 3)
#define NFQA_CFG_F_MAX (1 << 4)
/* flags for NFQA_SKB_INFO */
/* packet appears to have wrong checksums, but they are ok */
......
#ifndef _UAPI_XT_CGROUP_H
#define _UAPI_XT_CGROUP_H
#include <linux/types.h>
struct xt_cgroup_info {
__u32 id;
__u32 invert;
};
#endif /* _UAPI_XT_CGROUP_H */
#ifndef _XT_IPCOMP_H
#define _XT_IPCOMP_H
#include <linux/types.h>
struct xt_ipcomp {
__u32 spis[2]; /* Security Parameter Index */
__u8 invflags; /* Inverse flags */
__u8 hdrres; /* Test of the Reserved Filed */
};
/* Values for "invflags" field in struct xt_ipcomp. */
#define XT_IPCOMP_INV_SPI 0x01 /* Invert the sense of spi. */
#define XT_IPCOMP_INV_MASK 0x01 /* All possible flags. */
#endif /*_XT_IPCOMP_H*/
......@@ -238,12 +238,19 @@ config XPS
depends on SMP
default y
config NETPRIO_CGROUP
config CGROUP_NET_PRIO
tristate "Network priority cgroup"
depends on CGROUPS
---help---
Cgroup subsystem for use in assigning processes to network priorities on
a per-interface basis
a per-interface basis.
config CGROUP_NET_CLASSID
boolean "Network classid cgroup"
depends on CGROUPS
---help---
Cgroup subsystem for use as general purpose socket classid marker that is
being used in cls_cgroup and for netfilter matching.
config NET_RX_BUSY_POLL
boolean
......
......@@ -21,4 +21,5 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
......@@ -2741,7 +2741,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
return rc;
}
#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
static void skb_update_prio(struct sk_buff *skb)
{
struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
......
/*
* net/core/netclassid_cgroup.c Classid Cgroupfs Handling
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Thomas Graf <tgraf@suug.ch>
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/cgroup.h>
#include <linux/fdtable.h>
#include <net/cls_cgroup.h>
#include <net/sock.h>
static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
}
struct cgroup_cls_state *task_cls_state(struct task_struct *p)
{
return css_cls_state(task_css(p, net_cls_subsys_id));
}
EXPORT_SYMBOL_GPL(task_cls_state);
static struct cgroup_subsys_state *
cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct cgroup_cls_state *cs;
cs = kzalloc(sizeof(*cs), GFP_KERNEL);
if (!cs)
return ERR_PTR(-ENOMEM);
return &cs->css;
}
static int cgrp_css_online(struct cgroup_subsys_state *css)
{
struct cgroup_cls_state *cs = css_cls_state(css);
struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
if (parent)
cs->classid = parent->classid;
return 0;
}
static void cgrp_css_free(struct cgroup_subsys_state *css)
{
kfree(css_cls_state(css));
}
static int update_classid(const void *v, struct file *file, unsigned n)
{
int err;
struct socket *sock = sock_from_file(file, &err);
if (sock)
sock->sk->sk_classid = (u32)(unsigned long)v;
return 0;
}
static void cgrp_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct cgroup_cls_state *cs = css_cls_state(css);
void *v = (void *)(unsigned long)cs->classid;
struct task_struct *p;
cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
iterate_fd(p->files, 0, update_classid, v);
task_unlock(p);
}
}
static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
{
return css_cls_state(css)->classid;
}
static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
u64 value)
{
css_cls_state(css)->classid = (u32) value;
return 0;
}
static struct cftype ss_files[] = {
{
.name = "classid",
.read_u64 = read_classid,
.write_u64 = write_classid,
},
{ } /* terminate */
};
struct cgroup_subsys net_cls_subsys = {
.name = "net_cls",
.css_alloc = cgrp_css_alloc,
.css_online = cgrp_css_online,
.css_free = cgrp_css_free,
.attach = cgrp_attach,
.subsys_id = net_cls_subsys_id,
.base_cftypes = ss_files,
.module = THIS_MODULE,
};
static int __init init_netclassid_cgroup(void)
{
return cgroup_load_subsys(&net_cls_subsys);
}
__initcall(init_netclassid_cgroup);
......@@ -1307,19 +1307,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
module_put(owner);
}
#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
void sock_update_classid(struct sock *sk)
{
u32 classid;
classid = task_cls_classid(current);
if (classid != sk->sk_classid)
sk->sk_classid = classid;
}
EXPORT_SYMBOL(sock_update_classid);
#endif
#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
void sock_update_netprioidx(struct sock *sk)
{
if (in_interrupt())
......
......@@ -548,9 +548,3 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
module_init(nf_conntrack_l3proto_ipv4_init);
module_exit(nf_conntrack_l3proto_ipv4_fini);
void need_ipv4_conntrack(void)
{
return;
}
EXPORT_SYMBOL_GPL(need_ipv4_conntrack);
......@@ -858,6 +858,16 @@ config NETFILTER_XT_MATCH_BPF
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_MATCH_CGROUP
tristate '"control group" match support'
depends on NETFILTER_ADVANCED
depends on CGROUPS
select CGROUP_NET_CLASSID
---help---
Socket/process control group matching allows you to match locally
generated packets based on which net_cls control group processes
belong to.
config NETFILTER_XT_MATCH_CLUSTER
tristate '"cluster" match support'
depends on NF_CONNTRACK
......@@ -1035,6 +1045,15 @@ config NETFILTER_XT_MATCH_HL
in the IPv6 header, or the time-to-live field in the IPv4
header of the packet.
config NETFILTER_XT_MATCH_IPCOMP
tristate '"ipcomp" match support'
depends on NETFILTER_ADVANCED
help
This match extension allows you to match a range of CPIs(16 bits)
inside IPComp header of IPSec packets.
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_MATCH_IPRANGE
tristate '"iprange" address range match support'
depends on NETFILTER_ADVANCED
......
......@@ -133,6 +133,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
obj-$(CONFIG_NETFILTER_XT_MATCH_IPCOMP) += xt_ipcomp.o
obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o
obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
......@@ -142,6 +143,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
......
......@@ -624,34 +624,6 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex);
* call nfnl_lock for us.
*/
/*
* Find set by name, reference it once. The reference makes sure the
* thing pointed to, does not go away under our feet.
*
* The nfnl mutex is used in the function.
*/
ip_set_id_t
ip_set_nfnl_get(struct net *net, const char *name)
{
ip_set_id_t i, index = IPSET_INVALID_ID;
struct ip_set *s;
struct ip_set_net *inst = ip_set_pernet(net);
nfnl_lock(NFNL_SUBSYS_IPSET);
for (i = 0; i < inst->ip_set_max; i++) {
s = nfnl_set(inst, i);
if (s != NULL && STREQ(s->name, name)) {
__ip_set_get(s);
index = i;
break;
}
}
nfnl_unlock(NFNL_SUBSYS_IPSET);
return index;
}
EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
/*
* Find set by index, reference it once. The reference makes sure the
* thing pointed to, does not go away under our feet.
......
......@@ -1637,7 +1637,10 @@ static int sync_thread_master(void *data)
continue;
}
while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
int ret = __wait_event_interruptible(*sk_sleep(sk),
/* (Ab)use interruptible sleep to avoid increasing
* the load avg.
*/
__wait_event_interruptible(*sk_sleep(sk),
sock_writeable(sk) ||
kthread_should_stop());
if (unlikely(kthread_should_stop()))
......
......@@ -60,12 +60,6 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
const struct nlattr *attr) __read_mostly;
EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff);
EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
DEFINE_SPINLOCK(nf_conntrack_lock);
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
......@@ -361,15 +355,6 @@ ____nf_conntrack_find(struct net *net, u16 zone,
return NULL;
}
struct nf_conntrack_tuple_hash *
__nf_conntrack_find(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
{
return ____nf_conntrack_find(net, zone, tuple,
hash_conntrack_raw(tuple, zone));
}
EXPORT_SYMBOL_GPL(__nf_conntrack_find);
/* Find a connection corresponding to a tuple. */
static struct nf_conntrack_tuple_hash *
__nf_conntrack_find_get(struct net *net, u16 zone,
......
......@@ -2118,8 +2118,16 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
return err;
}
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
if (cda[CTA_MARK]) {
u32 mask = 0, mark, newmark;
if (cda[CTA_MARK_MASK])
mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
mark = ntohl(nla_get_be32(cda[CTA_MARK]));
newmark = (ct->mark & mask) ^ mark;
if (newmark != ct->mark)
ct->mark = newmark;
}
#endif
return 0;
}
......
......@@ -92,12 +92,6 @@ nf_ct_l3proto_find_get(u_int16_t l3proto)
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
{
module_put(p->me);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_put);
int
nf_ct_l3proto_try_module_get(unsigned short l3proto)
{
......
......@@ -315,7 +315,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
* manips not an issue.
*/
if (maniptype == NF_NAT_MANIP_SRC &&
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
if (in_range(l3proto, l4proto, orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
......@@ -339,7 +339,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
*/
/* Only bother mapping if it's not already in range and unique */
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (l4proto->in_range(tuple, maniptype,
&range->min_proto,
......
......@@ -74,22 +74,24 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
range_size = ntohs(range->max_proto.all) - min + 1;
}
if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC
? tuple->dst.u.all
: tuple->src.u.all);
else
} else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
off = prandom_u32();
} else {
off = *rover;
}
for (i = 0; ; ++off) {
*portptr = htons(min + off % range_size);
if (++i != range_size && nf_nat_used_tuple(tuple, ct))
continue;
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM))
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL))
*rover = off;
return;
}
return;
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
......
......@@ -28,8 +28,6 @@
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <net/netfilter/nf_log.h>
......@@ -75,7 +73,6 @@ struct nfulnl_instance {
};
#define INSTANCE_BUCKETS 16
static unsigned int hash_init;
static int nfnl_log_net_id __read_mostly;
......@@ -1066,11 +1063,6 @@ static int __init nfnetlink_log_init(void)
{
int status = -ENOMEM;
/* it's not really all that important to have a random value, so
* we can do this from the init function, even if there hasn't
* been that much entropy yet */
get_random_bytes(&hash_init, sizeof(hash_init));
netlink_register_notifier(&nfulnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfulnl_subsys);
if (status < 0) {
......
......@@ -297,6 +297,31 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
}
static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
{
const struct cred *cred;
if (sk->sk_state == TCP_TIME_WAIT)
return 0;
read_lock_bh(&sk->sk_callback_lock);
if (sk->sk_socket && sk->sk_socket->file) {
cred = sk->sk_socket->file->f_cred;
if (nla_put_be32(skb, NFQA_UID,
htonl(from_kuid_munged(&init_user_ns, cred->fsuid))))
goto nla_put_failure;
if (nla_put_be32(skb, NFQA_GID,
htonl(from_kgid_munged(&init_user_ns, cred->fsgid))))
goto nla_put_failure;
}
read_unlock_bh(&sk->sk_callback_lock);
return 0;
nla_put_failure:
read_unlock_bh(&sk->sk_callback_lock);
return -1;
}
static struct sk_buff *
nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry,
......@@ -372,6 +397,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (queue->flags & NFQA_CFG_F_CONNTRACK)
ct = nfqnl_ct_get(entskb, &size, &ctinfo);
if (queue->flags & NFQA_CFG_F_UID_GID) {
size += (nla_total_size(sizeof(u_int32_t)) /* uid */
+ nla_total_size(sizeof(u_int32_t))); /* gid */
}
skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
GFP_ATOMIC);
if (!skb)
......@@ -484,6 +514,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
goto nla_put_failure;
}
if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk &&
nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
goto nla_put_failure;
if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
goto nla_put_failure;
......
......@@ -164,7 +164,7 @@ static int nft_hash_init(const struct nft_set *set,
unsigned int cnt, i;
if (unlikely(!nft_hash_rnd_initted)) {
get_random_bytes(&nft_hash_rnd, 4);
nft_hash_rnd = prandom_u32();
nft_hash_rnd_initted = true;
}
......
......@@ -211,8 +211,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
ret = 0;
if ((info->ct_events || info->exp_events) &&
!nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
GFP_KERNEL))
GFP_KERNEL)) {
ret = -EINVAL;
goto err3;
}
if (info->helper[0]) {
ret = xt_ct_set_helper(ct, info->helper, par);
......
......@@ -100,7 +100,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
int ret;
if (unlikely(!rnd_inited)) {
get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
jhash_rnd = prandom_u32();
rnd_inited = true;
}
......
/*
* Xtables module to match the process control group.
*
* Might be used to implement individual "per-application" firewall
* policies in contrast to global policies based on control groups.
* Matching is based upon processes tagged to net_cls' classid marker.
*
* (C) 2013 Daniel Borkmann <dborkman@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/skbuff.h>
#include <linux/module.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_cgroup.h>
#include <net/sock.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
MODULE_DESCRIPTION("Xtables: process control group matching");
MODULE_ALIAS("ipt_cgroup");
MODULE_ALIAS("ip6t_cgroup");
static int cgroup_mt_check(const struct xt_mtchk_param *par)
{
struct xt_cgroup_info *info = par->matchinfo;
if (info->invert & ~1)
return -EINVAL;
return info->id ? 0 : -EINVAL;
}
static bool
cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_cgroup_info *info = par->matchinfo;
if (skb->sk == NULL)
return false;
return (info->id == skb->sk->sk_classid) ^ info->invert;
}
static struct xt_match cgroup_mt_reg __read_mostly = {
.name = "cgroup",
.revision = 0,
.family = NFPROTO_UNSPEC,
.checkentry = cgroup_mt_check,
.match = cgroup_mt,
.matchsize = sizeof(struct xt_cgroup_info),
.me = THIS_MODULE,
.hooks = (1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING),
};
static int __init cgroup_mt_init(void)
{
return xt_register_match(&cgroup_mt_reg);
}
static void __exit cgroup_mt_exit(void)
{
xt_unregister_match(&cgroup_mt_reg);
}
module_init(cgroup_mt_init);
module_exit(cgroup_mt_exit);
......@@ -229,7 +229,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
u_int32_t rand;
do {
get_random_bytes(&rand, sizeof(rand));
rand = prandom_u32();
} while (!rand);
cmpxchg(&connlimit_rnd, 0, rand);
}
......
......@@ -177,7 +177,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
/* initialize hash with random val at the time we allocate
* the first hashtable entry */
if (unlikely(!ht->rnd_initialized)) {
get_random_bytes(&ht->rnd, sizeof(ht->rnd));
ht->rnd = prandom_u32();
ht->rnd_initialized = true;
}
......
/* Kernel module to match IPComp parameters for IPv4 and IPv6
*
* Copyright (C) 2013 WindRiver
*
* Author:
* Fan Du <fan.du@windriver.com>
*
* Based on:
* net/netfilter/xt_esp.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/in.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/netfilter/xt_ipcomp.h>
#include <linux/netfilter/x_tables.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Fan Du <fan.du@windriver.com>");
MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match");
/* Returns 1 if the spi is matched by the range, 0 otherwise */
static inline bool
spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
{
bool r;
pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
invert ? '!' : ' ', min, spi, max);
r = (spi >= min && spi <= max) ^ invert;
pr_debug(" result %s\n", r ? "PASS" : "FAILED");
return r;
}
static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
struct ip_comp_hdr _comphdr;
const struct ip_comp_hdr *chdr;
const struct xt_ipcomp *compinfo = par->matchinfo;
/* Must not be a fragment. */
if (par->fragoff != 0)
return false;
chdr = skb_header_pointer(skb, par->thoff, sizeof(_comphdr), &_comphdr);
if (chdr == NULL) {
/* We've been asked to examine this packet, and we
* can't. Hence, no choice but to drop.
*/
pr_debug("Dropping evil IPComp tinygram.\n");
par->hotdrop = true;
return 0;
}
return spi_match(compinfo->spis[0], compinfo->spis[1],
ntohl(chdr->cpi << 16),
!!(compinfo->invflags & XT_IPCOMP_INV_SPI));
}
static int comp_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_ipcomp *compinfo = par->matchinfo;
/* Must specify no unknown invflags */
if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) {
pr_err("unknown flags %X\n", compinfo->invflags);
return -EINVAL;
}
return 0;
}
static struct xt_match comp_mt_reg[] __read_mostly = {
{
.name = "ipcomp",
.family = NFPROTO_IPV4,
.match = comp_mt,
.matchsize = sizeof(struct xt_ipcomp),
.proto = IPPROTO_COMP,
.checkentry = comp_mt_check,
.me = THIS_MODULE,
},
{
.name = "ipcomp",
.family = NFPROTO_IPV6,
.match = comp_mt,
.matchsize = sizeof(struct xt_ipcomp),
.proto = IPPROTO_COMP,
.checkentry = comp_mt_check,
.me = THIS_MODULE,
},
};
static int __init comp_mt_init(void)
{
return xt_register_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg));
}
static void __exit comp_mt_exit(void)
{
xt_unregister_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg));
}
module_init(comp_mt_init);
module_exit(comp_mt_exit);
......@@ -334,7 +334,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
size_t sz;
if (unlikely(!hash_rnd_inited)) {
get_random_bytes(&hash_rnd, sizeof(hash_rnd));
hash_rnd = prandom_u32();
hash_rnd_inited = true;
}
if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
......
......@@ -444,6 +444,7 @@ config NET_CLS_FLOW
config NET_CLS_CGROUP
tristate "Control Group Classifier"
select NET_CLS
select CGROUP_NET_CLASSID
depends on CGROUPS
---help---
Say Y here if you want to classify packets based on the control
......
......@@ -11,109 +11,13 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/cgroup.h>
#include <linux/rcupdate.h>
#include <linux/fdtable.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
#include <net/cls_cgroup.h>
static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
}
static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
{
return css_cls_state(task_css(p, net_cls_subsys_id));
}
static struct cgroup_subsys_state *
cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct cgroup_cls_state *cs;
cs = kzalloc(sizeof(*cs), GFP_KERNEL);
if (!cs)
return ERR_PTR(-ENOMEM);
return &cs->css;
}
static int cgrp_css_online(struct cgroup_subsys_state *css)
{
struct cgroup_cls_state *cs = css_cls_state(css);
struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
if (parent)
cs->classid = parent->classid;
return 0;
}
static void cgrp_css_free(struct cgroup_subsys_state *css)
{
kfree(css_cls_state(css));
}
static int update_classid(const void *v, struct file *file, unsigned n)
{
int err;
struct socket *sock = sock_from_file(file, &err);
if (sock)
sock->sk->sk_classid = (u32)(unsigned long)v;
return 0;
}
static void cgrp_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct task_struct *p;
struct cgroup_cls_state *cs = css_cls_state(css);
void *v = (void *)(unsigned long)cs->classid;
cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
iterate_fd(p->files, 0, update_classid, v);
task_unlock(p);
}
}
static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
{
return css_cls_state(css)->classid;
}
static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
u64 value)
{
css_cls_state(css)->classid = (u32) value;
return 0;
}
static struct cftype ss_files[] = {
{
.name = "classid",
.read_u64 = read_classid,
.write_u64 = write_classid,
},
{ } /* terminate */
};
struct cgroup_subsys net_cls_subsys = {
.name = "net_cls",
.css_alloc = cgrp_css_alloc,
.css_online = cgrp_css_online,
.css_free = cgrp_css_free,
.attach = cgrp_attach,
.subsys_id = net_cls_subsys_id,
.base_cftypes = ss_files,
.module = THIS_MODULE,
};
struct cls_cgroup_head {
u32 handle;
struct tcf_exts exts;
......@@ -305,25 +209,12 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
static int __init init_cgroup_cls(void)
{
int ret;
ret = cgroup_load_subsys(&net_cls_subsys);
if (ret)
goto out;
ret = register_tcf_proto_ops(&cls_cgroup_ops);
if (ret)
cgroup_unload_subsys(&net_cls_subsys);
out:
return ret;
return register_tcf_proto_ops(&cls_cgroup_ops);
}
static void __exit exit_cgroup_cls(void)
{
unregister_tcf_proto_ops(&cls_cgroup_ops);
cgroup_unload_subsys(&net_cls_subsys);
}
module_init(init_cgroup_cls);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment