Commit 9aa28f2b authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nftables

Pablo Neira Ayuso says: <pablo@netfilter.org>

====================
nftables updates for net-next

The following patchset contains nftables updates for your net-next tree,
they are:

* Add set operation to the meta expression by means of the select_ops()
  infrastructure, this allows us to set the packet mark among other things.
  From Arturo Borrero Gonzalez.

* Fix wrong format in sscanf in nf_tables_set_alloc_name(), from Daniel
  Borkmann.

* Add new queue expression to nf_tables. These comes with two previous patches
  to prepare this new feature, one to add mask in nf_tables_core to
  evaluate the queue verdict appropriately and another to refactor common
  code with xt_NFQUEUE, from Eric Leblond.

* Do not hide nftables from Kconfig if nfnetlink is not enabled, also from
  Eric Leblond.

* Add the reject expression to nf_tables, this adds the missing TCP RST
  support. It comes with an initial patch to refactor common code with
  xt_NFQUEUE, again from Eric Leblond.

* Remove an unused variable assignment in nf_tables_dump_set(), from Michal
  Nazarewicz.

* Remove the nft_meta_target code, now that Arturo added the set operation
  to the meta expression, from me.

* Add help information for nf_tables to Kconfig, also from me.

* Allow to dump all sets by specifying NFPROTO_UNSPEC, similar feature is
  available to other nf_tables objects, requested by Arturo, from me.

* Expose the table usage counter, so we can know how many chains are using
  this table without dumping the list of chains, from Tomasz Bursztyka.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6a8c4796 c9c8e485
#ifndef _IPV4_NF_REJECT_H
#define _IPV4_NF_REJECT_H
#include <net/ip.h>
#include <net/tcp.h>
#include <net/route.h>
#include <net/dst.h>
static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
{
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}
/* Send RST reply */
static void nf_send_reset(struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
const struct iphdr *oiph;
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _otcph, *tcph;
/* IP header checks: fragment. */
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
return;
oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
sizeof(_otcph), &_otcph);
if (oth == NULL)
return;
/* No RST for RST. */
if (oth->rst)
return;
if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
return;
/* Check checksum */
if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
return;
oiph = ip_hdr(oldskb);
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
LL_MAX_HEADER, GFP_ATOMIC);
if (!nskb)
return;
skb_reserve(nskb, LL_MAX_HEADER);
skb_reset_network_header(nskb);
niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
niph->version = 4;
niph->ihl = sizeof(struct iphdr) / 4;
niph->tos = 0;
niph->id = 0;
niph->frag_off = htons(IP_DF);
niph->protocol = IPPROTO_TCP;
niph->check = 0;
niph->saddr = oiph->daddr;
niph->daddr = oiph->saddr;
skb_reset_transport_header(nskb);
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
memset(tcph, 0, sizeof(*tcph));
tcph->source = oth->dest;
tcph->dest = oth->source;
tcph->doff = sizeof(struct tcphdr) / 4;
if (oth->ack)
tcph->seq = oth->ack_seq;
else {
tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
oldskb->len - ip_hdrlen(oldskb) -
(oth->doff << 2));
tcph->ack = 1;
}
tcph->rst = 1;
tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
niph->daddr, 0);
nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum_start = (unsigned char *)tcph - nskb->head;
nskb->csum_offset = offsetof(struct tcphdr, check);
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
nskb->protocol = htons(ETH_P_IP);
if (ip_route_me_harder(nskb, RTN_UNSPEC))
goto free_nskb;
niph->ttl = ip4_dst_hoplimit(skb_dst(nskb));
/* "Never happens" */
if (nskb->len > dst_mtu(skb_dst(nskb)))
goto free_nskb;
nf_ct_attach(nskb, oldskb);
#ifdef CONFIG_BRIDGE_NETFILTER
/* If we use ip_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
nskb->dev = oldskb->nf_bridge->physindev;
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
oeth->h_source, oeth->h_dest, nskb->len) < 0)
goto free_nskb;
dev_queue_xmit(nskb);
} else
#endif
ip_local_out(nskb);
return;
free_nskb:
kfree_skb(nskb);
}
#endif /* _IPV4_NF_REJECT_H */
#ifndef _IPV6_NF_REJECT_H
#define _IPV6_NF_REJECT_H
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip6_fib.h>
#include <net/ip6_checksum.h>
#include <linux/netfilter_ipv6.h>
static inline void
nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
unsigned int hooknum)
{
if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
skb_in->dev = net->loopback_dev;
icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
}
/* Send RST reply */
static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
struct tcphdr otcph, *tcph;
unsigned int otcplen, hh_len;
int tcphoff, needs_ack;
const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
struct ipv6hdr *ip6h;
#define DEFAULT_TOS_VALUE 0x0U
const __u8 tclass = DEFAULT_TOS_VALUE;
struct dst_entry *dst = NULL;
u8 proto;
__be16 frag_off;
struct flowi6 fl6;
if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
(!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
pr_debug("addr is not unicast.\n");
return;
}
proto = oip6h->nexthdr;
tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);
if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
pr_debug("Cannot get TCP header.\n");
return;
}
otcplen = oldskb->len - tcphoff;
/* IP header checks: fragment, too short. */
if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
pr_debug("proto(%d) != IPPROTO_TCP, "
"or too short. otcplen = %d\n",
proto, otcplen);
return;
}
if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
BUG();
/* No RST for RST. */
if (otcph.rst) {
pr_debug("RST is set\n");
return;
}
/* Check checksum. */
if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
pr_debug("TCP checksum is invalid\n");
return;
}
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
fl6.saddr = oip6h->daddr;
fl6.daddr = oip6h->saddr;
fl6.fl6_sport = otcph.dest;
fl6.fl6_dport = otcph.source;
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
if (dst == NULL || dst->error) {
dst_release(dst);
return;
}
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
if (IS_ERR(dst))
return;
hh_len = (dst->dev->hard_header_len + 15)&~15;
nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
+ sizeof(struct tcphdr) + dst->trailer_len,
GFP_ATOMIC);
if (!nskb) {
net_dbg_ratelimited("cannot alloc skb\n");
dst_release(dst);
return;
}
skb_dst_set(nskb, dst);
skb_reserve(nskb, hh_len + dst->header_len);
skb_put(nskb, sizeof(struct ipv6hdr));
skb_reset_network_header(nskb);
ip6h = ipv6_hdr(nskb);
ip6_flow_hdr(ip6h, tclass, 0);
ip6h->hop_limit = ip6_dst_hoplimit(dst);
ip6h->nexthdr = IPPROTO_TCP;
ip6h->saddr = oip6h->daddr;
ip6h->daddr = oip6h->saddr;
skb_reset_transport_header(nskb);
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
/* Truncate to length (no data) */
tcph->doff = sizeof(struct tcphdr)/4;
tcph->source = otcph.dest;
tcph->dest = otcph.source;
if (otcph.ack) {
needs_ack = 0;
tcph->seq = otcph.ack_seq;
tcph->ack_seq = 0;
} else {
needs_ack = 1;
tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
+ otcplen - (otcph.doff<<2));
tcph->seq = 0;
}
/* Reset flags */
((u_int8_t *)tcph)[13] = 0;
tcph->rst = 1;
tcph->ack = needs_ack;
tcph->window = 0;
tcph->urg_ptr = 0;
tcph->check = 0;
/* Adjust TCP checksum */
tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
&ipv6_hdr(nskb)->daddr,
sizeof(struct tcphdr), IPPROTO_TCP,
csum_partial(tcph,
sizeof(struct tcphdr), 0));
nf_ct_attach(nskb, oldskb);
#ifdef CONFIG_BRIDGE_NETFILTER
/* If we use ip6_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
nskb->dev = oldskb->nf_bridge->physindev;
nskb->protocol = htons(ETH_P_IPV6);
ip6h->payload_len = htons(sizeof(struct tcphdr));
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
oeth->h_source, oeth->h_dest, nskb->len) < 0)
return;
dev_queue_xmit(nskb);
} else
#endif
ip6_local_out(nskb);
}
#endif /* _IPV6_NF_REJECT_H */
#ifndef _NF_QUEUE_H
#define _NF_QUEUE_H
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/jhash.h>
/* Each queued (to userspace) skbuff has one of these. */
struct nf_queue_entry {
struct list_head list;
......@@ -33,4 +37,62 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
static inline void init_hashrandom(u32 *jhash_initval)
{
while (*jhash_initval == 0)
*jhash_initval = prandom_u32();
}
static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval)
{
const struct iphdr *iph = ip_hdr(skb);
/* packets in either direction go into same queue */
if ((__force u32)iph->saddr < (__force u32)iph->daddr)
return jhash_3words((__force u32)iph->saddr,
(__force u32)iph->daddr, iph->protocol, jhash_initval);
return jhash_3words((__force u32)iph->daddr,
(__force u32)iph->saddr, iph->protocol, jhash_initval);
}
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
u32 a, b, c;
if ((__force u32)ip6h->saddr.s6_addr32[3] <
(__force u32)ip6h->daddr.s6_addr32[3]) {
a = (__force u32) ip6h->saddr.s6_addr32[3];
b = (__force u32) ip6h->daddr.s6_addr32[3];
} else {
b = (__force u32) ip6h->saddr.s6_addr32[3];
a = (__force u32) ip6h->daddr.s6_addr32[3];
}
if ((__force u32)ip6h->saddr.s6_addr32[1] <
(__force u32)ip6h->daddr.s6_addr32[1])
c = (__force u32) ip6h->saddr.s6_addr32[1];
else
c = (__force u32) ip6h->daddr.s6_addr32[1];
return jhash_3words(a, b, c, jhash_initval);
}
#endif
static inline u32
nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
u32 jhash_initval)
{
if (family == NFPROTO_IPV4)
queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32;
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (family == NFPROTO_IPV6)
queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32;
#endif
return queue;
}
#endif /* _NF_QUEUE_H */
......@@ -110,11 +110,13 @@ enum nft_table_flags {
*
* @NFTA_TABLE_NAME: name of the table (NLA_STRING)
* @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
* @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
*/
enum nft_table_attributes {
NFTA_TABLE_UNSPEC,
NFTA_TABLE_NAME,
NFTA_TABLE_FLAGS,
NFTA_TABLE_USE,
__NFTA_TABLE_MAX
};
#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1)
......@@ -553,11 +555,13 @@ enum nft_meta_keys {
*
* @NFTA_META_DREG: destination register (NLA_U32)
* @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
* @NFTA_META_SREG: source register (NLA_U32)
*/
enum nft_meta_attributes {
NFTA_META_UNSPEC,
NFTA_META_DREG,
NFTA_META_KEY,
NFTA_META_SREG,
__NFTA_META_MAX
};
#define NFTA_META_MAX (__NFTA_META_MAX - 1)
......@@ -657,6 +661,26 @@ enum nft_log_attributes {
};
#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1)
/**
* enum nft_queue_attributes - nf_tables queue expression netlink attributes
*
* @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
* @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
* @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
*/
enum nft_queue_attributes {
NFTA_QUEUE_UNSPEC,
NFTA_QUEUE_NUM,
NFTA_QUEUE_TOTAL,
NFTA_QUEUE_FLAGS,
__NFTA_QUEUE_MAX
};
#define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1)
#define NFT_QUEUE_FLAG_BYPASS 0x01 /* for compatibility with v2 */
#define NFT_QUEUE_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */
#define NFT_QUEUE_FLAG_MASK 0x03
/**
* enum nft_reject_types - nf_tables reject expression reject types
*
......
......@@ -39,23 +39,33 @@ config NF_CONNTRACK_PROC_COMPAT
config NF_TABLES_IPV4
depends on NF_TABLES
tristate "IPv4 nf_tables support"
config NFT_REJECT_IPV4
depends on NF_TABLES_IPV4
tristate "nf_tables IPv4 reject support"
help
This option enables the IPv4 support for nf_tables.
config NFT_CHAIN_ROUTE_IPV4
depends on NF_TABLES_IPV4
tristate "IPv4 nf_tables route chain support"
help
This option enables the "route" chain for IPv4 in nf_tables. This
chain type is used to force packet re-routing after mangling header
fields such as the source, destination, type of service and
the packet mark.
config NFT_CHAIN_NAT_IPV4
depends on NF_TABLES_IPV4
depends on NF_NAT_IPV4 && NFT_NAT
tristate "IPv4 nf_tables nat chain support"
help
This option enables the "nat" chain for IPv4 in nf_tables. This
chain type is used to perform Network Address Translation (NAT)
packet transformations such as the source, destination address and
source and destination ports.
config NF_TABLES_ARP
depends on NF_TABLES
tristate "ARP nf_tables support"
help
This option enables the ARP support for nf_tables.
config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)"
......
......@@ -28,7 +28,6 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
......
......@@ -17,10 +17,6 @@
#include <linux/udp.h>
#include <linux/icmp.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/route.h>
#include <net/dst.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_REJECT.h>
......@@ -28,128 +24,12 @@
#include <linux/netfilter_bridge.h>
#endif
#include <net/netfilter/ipv4/nf_reject.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
/* Send RST reply */
static void send_reset(struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
const struct iphdr *oiph;
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _otcph, *tcph;
/* IP header checks: fragment. */
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
return;
oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
sizeof(_otcph), &_otcph);
if (oth == NULL)
return;
/* No RST for RST. */
if (oth->rst)
return;
if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
return;
/* Check checksum */
if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
return;
oiph = ip_hdr(oldskb);
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
LL_MAX_HEADER, GFP_ATOMIC);
if (!nskb)
return;
skb_reserve(nskb, LL_MAX_HEADER);
skb_reset_network_header(nskb);
niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
niph->version = 4;
niph->ihl = sizeof(struct iphdr) / 4;
niph->tos = 0;
niph->id = 0;
niph->frag_off = htons(IP_DF);
niph->protocol = IPPROTO_TCP;
niph->check = 0;
niph->saddr = oiph->daddr;
niph->daddr = oiph->saddr;
skb_reset_transport_header(nskb);
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
memset(tcph, 0, sizeof(*tcph));
tcph->source = oth->dest;
tcph->dest = oth->source;
tcph->doff = sizeof(struct tcphdr) / 4;
if (oth->ack)
tcph->seq = oth->ack_seq;
else {
tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
oldskb->len - ip_hdrlen(oldskb) -
(oth->doff << 2));
tcph->ack = 1;
}
tcph->rst = 1;
tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
niph->daddr, 0);
nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum_start = (unsigned char *)tcph - nskb->head;
nskb->csum_offset = offsetof(struct tcphdr, check);
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
nskb->protocol = htons(ETH_P_IP);
if (ip_route_me_harder(nskb, RTN_UNSPEC))
goto free_nskb;
niph->ttl = ip4_dst_hoplimit(skb_dst(nskb));
/* "Never happens" */
if (nskb->len > dst_mtu(skb_dst(nskb)))
goto free_nskb;
nf_ct_attach(nskb, oldskb);
#ifdef CONFIG_BRIDGE_NETFILTER
/* If we use ip_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
nskb->dev = oldskb->nf_bridge->physindev;
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
oeth->h_source, oeth->h_dest, nskb->len) < 0)
goto free_nskb;
dev_queue_xmit(nskb);
} else
#endif
ip_local_out(nskb);
return;
free_nskb:
kfree_skb(nskb);
}
static inline void send_unreach(struct sk_buff *skb_in, int code)
{
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}
static unsigned int
reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
......@@ -157,28 +37,28 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
send_unreach(skb, ICMP_NET_UNREACH);
nf_send_unreach(skb, ICMP_NET_UNREACH);
break;
case IPT_ICMP_HOST_UNREACHABLE:
send_unreach(skb, ICMP_HOST_UNREACH);
nf_send_unreach(skb, ICMP_HOST_UNREACH);
break;
case IPT_ICMP_PROT_UNREACHABLE:
send_unreach(skb, ICMP_PROT_UNREACH);
nf_send_unreach(skb, ICMP_PROT_UNREACH);
break;
case IPT_ICMP_PORT_UNREACHABLE:
send_unreach(skb, ICMP_PORT_UNREACH);
nf_send_unreach(skb, ICMP_PORT_UNREACH);
break;
case IPT_ICMP_NET_PROHIBITED:
send_unreach(skb, ICMP_NET_ANO);
nf_send_unreach(skb, ICMP_NET_ANO);
break;
case IPT_ICMP_HOST_PROHIBITED:
send_unreach(skb, ICMP_HOST_ANO);
nf_send_unreach(skb, ICMP_HOST_ANO);
break;
case IPT_ICMP_ADMIN_PROHIBITED:
send_unreach(skb, ICMP_PKT_FILTERED);
nf_send_unreach(skb, ICMP_PKT_FILTERED);
break;
case IPT_TCP_RESET:
send_reset(skb, par->hooknum);
nf_send_reset(skb, par->hooknum);
case IPT_ICMP_ECHOREPLY:
/* Doesn't happen. */
break;
......
......@@ -28,15 +28,27 @@ config NF_CONNTRACK_IPV6
config NF_TABLES_IPV6
depends on NF_TABLES
tristate "IPv6 nf_tables support"
help
This option enables the IPv6 support for nf_tables.
config NFT_CHAIN_ROUTE_IPV6
depends on NF_TABLES_IPV6
tristate "IPv6 nf_tables route chain support"
help
This option enables the "route" chain for IPv6 in nf_tables. This
chain type is used to force packet re-routing after mangling header
fields such as the source, destination, flowlabel, hop-limit and
the packet mark.
config NFT_CHAIN_NAT_IPV6
depends on NF_TABLES_IPV6
depends on NF_NAT_IPV6 && NFT_NAT
tristate "IPv6 nf_tables nat chain support"
help
This option enables the "nat" chain for IPv6 in nf_tables. This
chain type is used to perform Network Address Translation (NAT)
packet transformations such as the source, destination address and
source and destination ports.
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
......
......@@ -23,181 +23,18 @@
#include <linux/skbuff.h>
#include <linux/icmpv6.h>
#include <linux/netdevice.h>
#include <net/ipv6.h>
#include <net/tcp.h>
#include <net/icmp.h>
#include <net/ip6_checksum.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#include <net/flow.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter_ipv6/ip6t_REJECT.h>
#include <net/netfilter/ipv6/nf_reject.h>
MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
MODULE_LICENSE("GPL");
/* Send RST reply */
static void send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
struct tcphdr otcph, *tcph;
unsigned int otcplen, hh_len;
int tcphoff, needs_ack;
const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
struct ipv6hdr *ip6h;
#define DEFAULT_TOS_VALUE 0x0U
const __u8 tclass = DEFAULT_TOS_VALUE;
struct dst_entry *dst = NULL;
u8 proto;
__be16 frag_off;
struct flowi6 fl6;
if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
(!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
pr_debug("addr is not unicast.\n");
return;
}
proto = oip6h->nexthdr;
tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);
if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
pr_debug("Cannot get TCP header.\n");
return;
}
otcplen = oldskb->len - tcphoff;
/* IP header checks: fragment, too short. */
if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
pr_debug("proto(%d) != IPPROTO_TCP, "
"or too short. otcplen = %d\n",
proto, otcplen);
return;
}
if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
BUG();
/* No RST for RST. */
if (otcph.rst) {
pr_debug("RST is set\n");
return;
}
/* Check checksum. */
if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
pr_debug("TCP checksum is invalid\n");
return;
}
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
fl6.saddr = oip6h->daddr;
fl6.daddr = oip6h->saddr;
fl6.fl6_sport = otcph.dest;
fl6.fl6_dport = otcph.source;
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
if (dst == NULL || dst->error) {
dst_release(dst);
return;
}
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
if (IS_ERR(dst))
return;
hh_len = (dst->dev->hard_header_len + 15)&~15;
nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
+ sizeof(struct tcphdr) + dst->trailer_len,
GFP_ATOMIC);
if (!nskb) {
net_dbg_ratelimited("cannot alloc skb\n");
dst_release(dst);
return;
}
skb_dst_set(nskb, dst);
skb_reserve(nskb, hh_len + dst->header_len);
skb_put(nskb, sizeof(struct ipv6hdr));
skb_reset_network_header(nskb);
ip6h = ipv6_hdr(nskb);
ip6_flow_hdr(ip6h, tclass, 0);
ip6h->hop_limit = ip6_dst_hoplimit(dst);
ip6h->nexthdr = IPPROTO_TCP;
ip6h->saddr = oip6h->daddr;
ip6h->daddr = oip6h->saddr;
skb_reset_transport_header(nskb);
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
/* Truncate to length (no data) */
tcph->doff = sizeof(struct tcphdr)/4;
tcph->source = otcph.dest;
tcph->dest = otcph.source;
if (otcph.ack) {
needs_ack = 0;
tcph->seq = otcph.ack_seq;
tcph->ack_seq = 0;
} else {
needs_ack = 1;
tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
+ otcplen - (otcph.doff<<2));
tcph->seq = 0;
}
/* Reset flags */
((u_int8_t *)tcph)[13] = 0;
tcph->rst = 1;
tcph->ack = needs_ack;
tcph->window = 0;
tcph->urg_ptr = 0;
tcph->check = 0;
/* Adjust TCP checksum */
tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
&ipv6_hdr(nskb)->daddr,
sizeof(struct tcphdr), IPPROTO_TCP,
csum_partial(tcph,
sizeof(struct tcphdr), 0));
nf_ct_attach(nskb, oldskb);
#ifdef CONFIG_BRIDGE_NETFILTER
/* If we use ip6_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
nskb->dev = oldskb->nf_bridge->physindev;
nskb->protocol = htons(ETH_P_IPV6);
ip6h->payload_len = htons(sizeof(struct tcphdr));
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
oeth->h_source, oeth->h_dest, nskb->len) < 0)
return;
dev_queue_xmit(nskb);
} else
#endif
ip6_local_out(nskb);
}
static inline void
send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
unsigned int hooknum)
{
if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
skb_in->dev = net->loopback_dev;
icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
}
static unsigned int
reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
......@@ -208,25 +45,25 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
pr_debug("%s: medium point\n", __func__);
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
break;
case IP6T_ICMP6_ADM_PROHIBITED:
send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
break;
case IP6T_ICMP6_NOT_NEIGHBOUR:
send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
break;
case IP6T_ICMP6_ADDR_UNREACH:
send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
break;
case IP6T_ICMP6_PORT_UNREACH:
send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
break;
case IP6T_ICMP6_ECHOREPLY:
/* Do nothing */
break;
case IP6T_TCP_RESET:
send_reset(net, skb, par->hooknum);
nf_send_reset6(net, skb, par->hooknum);
break;
default:
net_info_ratelimited("case %u not handled yet\n", reject->with);
......
......@@ -414,47 +414,104 @@ config NETFILTER_SYNPROXY
endif # NF_CONNTRACK
config NF_TABLES
depends on NETFILTER_NETLINK
select NETFILTER_NETLINK
tristate "Netfilter nf_tables support"
help
nftables is the new packet classification framework that intends to
replace the existing {ip,ip6,arp,eb}_tables infrastructure. It
provides a pseudo-state machine with an extensible instruction-set
(also known as expressions) that the userspace 'nft' utility
(http://www.netfilter.org/projects/nftables) uses to build the
rule-set. It also comes with the generic set infrastructure that
allows you to construct mappings between matchings and actions
for performance lookups.
To compile it as a module, choose M here.
config NFT_EXTHDR
depends on NF_TABLES
tristate "Netfilter nf_tables IPv6 exthdr module"
help
This option adds the "exthdr" expression that you can use to match
IPv6 extension headers.
config NFT_META
depends on NF_TABLES
tristate "Netfilter nf_tables meta module"
help
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
config NFT_CT
depends on NF_TABLES
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
help
This option adds the "meta" expression that you can use to match
connection tracking information such as the flow state.
config NFT_RBTREE
depends on NF_TABLES
tristate "Netfilter nf_tables rbtree set module"
help
This option adds the "rbtree" set type (Red Black tree) that is used
to build interval-based sets.
config NFT_HASH
depends on NF_TABLES
tristate "Netfilter nf_tables hash set module"
help
This option adds the "hash" set type that is used to build one-way
mappings between matchings and actions.
config NFT_COUNTER
depends on NF_TABLES
tristate "Netfilter nf_tables counter module"
help
This option adds the "counter" expression that you can use to
include packet and byte counters in a rule.
config NFT_LOG
depends on NF_TABLES
tristate "Netfilter nf_tables log module"
help
This option adds the "log" expression that you can use to log
packets matching some criteria.
config NFT_LIMIT
depends on NF_TABLES
tristate "Netfilter nf_tables limit module"
help
This option adds the "limit" expression that you can use to
ratelimit rule matchings.
config NFT_NAT
depends on NF_TABLES
depends on NF_CONNTRACK
depends on NF_NAT
tristate "Netfilter nf_tables nat module"
help
This option adds the "nat" expression that you can use to perform
typical Network Address Translation (NAT) packet transformations.
config NFT_QUEUE
depends on NF_TABLES
depends on NETFILTER_XTABLES
depends on NETFILTER_NETLINK_QUEUE
tristate "Netfilter nf_tables queue module"
help
This is required if you intend to use the userspace queueing
infrastructure (also known as NFQUEUE) from nftables.
config NFT_REJECT
depends on NF_TABLES
depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
help
This option adds the "reject" expression that you can use to
explicitly deny and notify via TCP reset/ICMP informational errors
unallowed traffic.
config NFT_COMPAT
depends on NF_TABLES
......
......@@ -76,7 +76,8 @@ obj-$(CONFIG_NFT_META) += nft_meta.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
#nf_tables-objs += nft_meta_target.o
obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
......
......@@ -180,7 +180,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
nfmsg->res_id = 0;
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)))
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
goto nla_put_failure;
return nlmsg_end(skb, nlh);
......@@ -1923,12 +1924,14 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
{
struct net *net = sock_net(skb->sk);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_af_info *afi;
const struct nft_af_info *afi = NULL;
const struct nft_table *table = NULL;
if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
if (IS_ERR(afi))
return PTR_ERR(afi);
}
if (nla[NFTA_SET_TABLE] != NULL) {
table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
......@@ -1973,11 +1976,14 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
return -ENOMEM;
list_for_each_entry(i, &ctx->table->sets, list) {
if (!sscanf(i->name, name, &n))
int tmp;
if (!sscanf(i->name, name, &tmp))
continue;
if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE)
if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE)
continue;
set_bit(n, inuse);
set_bit(tmp, inuse);
}
n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE);
......@@ -2094,7 +2100,7 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
return skb->len;
}
static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nft_set *set;
......@@ -2127,6 +2133,61 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
return skb->len;
}
static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nft_set *set;
unsigned int idx, s_idx = cb->args[0];
const struct nft_af_info *afi;
struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
struct net *net = sock_net(skb->sk);
int cur_family = cb->args[3];
if (cb->args[1])
return skb->len;
list_for_each_entry(afi, &net->nft.af_info, list) {
if (cur_family) {
if (afi->family != cur_family)
continue;
cur_family = 0;
}
list_for_each_entry(table, &afi->tables, list) {
if (cur_table) {
if (cur_table != table)
continue;
cur_table = NULL;
}
ctx->table = table;
ctx->afi = afi;
idx = 0;
list_for_each_entry(set, &ctx->table->sets, list) {
if (idx < s_idx)
goto cont;
if (nf_tables_fill_set(skb, ctx, set,
NFT_MSG_NEWSET,
NLM_F_MULTI) < 0) {
cb->args[0] = idx;
cb->args[2] = (unsigned long) table;
cb->args[3] = afi->family;
goto done;
}
cont:
idx++;
}
if (s_idx)
s_idx = 0;
}
}
cb->args[1] = 1;
done:
return skb->len;
}
static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
......@@ -2143,9 +2204,12 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
return err;
if (ctx.table == NULL)
if (ctx.table == NULL) {
if (ctx.afi == NULL)
ret = nf_tables_dump_sets_all(&ctx, skb, cb);
else
ret = nf_tables_dump_sets_family(&ctx, skb, cb);
} else
ret = nf_tables_dump_sets_table(&ctx, skb, cb);
return ret;
......@@ -2158,6 +2222,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
const struct nft_set *set;
struct nft_ctx ctx;
struct sk_buff *skb2;
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
int err;
/* Verify existance before starting dump */
......@@ -2172,6 +2237,10 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
return netlink_dump_start(nlsk, skb, nlh, &c);
}
/* Only accept unspec with dump */
if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
return -EAFNOSUPPORT;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
......@@ -2341,6 +2410,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_set *set;
struct nft_ctx ctx;
int err;
......@@ -2352,6 +2422,9 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
return err;
if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
return -EAFNOSUPPORT;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
......@@ -2521,9 +2594,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
u32 portid, seq;
int event, err;
nfmsg = nlmsg_data(cb->nlh);
err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX,
nft_set_elem_list_policy);
err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy);
if (err < 0)
return err;
......
......@@ -164,7 +164,7 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
break;
}
switch (data[NFT_REG_VERDICT].verdict) {
switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
......@@ -172,6 +172,9 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
return data[NFT_REG_VERDICT].verdict;
}
switch (data[NFT_REG_VERDICT].verdict) {
case NFT_JUMP:
if (unlikely(pkt->skb->nf_trace))
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
......
......@@ -21,10 +21,13 @@
struct nft_meta {
enum nft_meta_keys key:8;
union {
enum nft_registers dreg:8;
enum nft_registers sreg:8;
};
};
static void nft_meta_eval(const struct nft_expr *expr,
static void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_data data[NFT_REG_MAX + 1],
const struct nft_pktinfo *pkt)
{
......@@ -132,23 +135,50 @@ static void nft_meta_eval(const struct nft_expr *expr,
data[NFT_REG_VERDICT].verdict = NFT_BREAK;
}
static void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_data data[NFT_REG_MAX + 1],
const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
u32 value = data[meta->sreg].data[0];
switch (meta->key) {
case NFT_META_MARK:
skb->mark = value;
break;
case NFT_META_PRIORITY:
skb->priority = value;
break;
case NFT_META_NFTRACE:
skb->nf_trace = 1;
break;
default:
WARN_ON(1);
}
}
static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
[NFTA_META_DREG] = { .type = NLA_U32 },
[NFTA_META_KEY] = { .type = NLA_U32 },
[NFTA_META_SREG] = { .type = NLA_U32 },
};
static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[])
static int nft_meta_init_validate_set(uint32_t key)
{
struct nft_meta *priv = nft_expr_priv(expr);
int err;
if (tb[NFTA_META_DREG] == NULL ||
tb[NFTA_META_KEY] == NULL)
return -EINVAL;
switch (key) {
case NFT_META_MARK:
case NFT_META_PRIORITY:
case NFT_META_NFTRACE:
return 0;
default:
return -EOPNOTSUPP;
}
}
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
static int nft_meta_init_validate_get(uint32_t key)
{
switch (key) {
case NFT_META_LEN:
case NFT_META_PROTOCOL:
case NFT_META_PRIORITY:
......@@ -167,26 +197,69 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
#endif
break;
return 0;
default:
return -EOPNOTSUPP;
}
}
static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
if (tb[NFTA_META_DREG]) {
err = nft_meta_init_validate_get(priv->key);
if (err < 0)
return err;
priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
err = nft_validate_output_register(priv->dreg);
if (err < 0)
return err;
return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
return nft_validate_data_load(ctx, priv->dreg, NULL,
NFT_DATA_VALUE);
}
err = nft_meta_init_validate_set(priv->key);
if (err < 0)
return err;
priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
return 0;
}
static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
static int nft_meta_get_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static int nft_meta_set_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg)))
goto nla_put_failure;
return 0;
nla_put_failure:
......@@ -194,17 +267,44 @@ static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
}
static struct nft_expr_type nft_meta_type;
static const struct nft_expr_ops nft_meta_ops = {
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_eval,
.eval = nft_meta_get_eval,
.init = nft_meta_init,
.dump = nft_meta_dump,
.dump = nft_meta_get_dump,
};
static const struct nft_expr_ops nft_meta_set_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
.init = nft_meta_init,
.dump = nft_meta_set_dump,
};
static const struct nft_expr_ops *
nft_meta_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
if (tb[NFTA_META_KEY] == NULL)
return ERR_PTR(-EINVAL);
if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
return ERR_PTR(-EINVAL);
if (tb[NFTA_META_DREG])
return &nft_meta_get_ops;
if (tb[NFTA_META_SREG])
return &nft_meta_set_ops;
return ERR_PTR(-EINVAL);
}
static struct nft_expr_type nft_meta_type __read_mostly = {
.name = "meta",
.ops = &nft_meta_ops,
.select_ops = &nft_meta_select_ops,
.policy = nft_meta_policy,
.maxattr = NFTA_META_MAX,
.owner = THIS_MODULE,
......
/*
* Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/rbtree.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
struct nft_meta {
enum nft_meta_keys key;
};
static void nft_meta_eval(const struct nft_expr *expr,
struct nft_data *nfres,
struct nft_data *data,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
u32 val = data->data[0];
switch (meta->key) {
case NFT_META_MARK:
skb->mark = val;
break;
case NFT_META_PRIORITY:
skb->priority = val;
break;
case NFT_META_NFTRACE:
skb->nf_trace = val;
break;
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
skb->secmark = val;
break;
#endif
default:
WARN_ON(1);
}
}
static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
[NFTA_META_KEY] = { .type = NLA_U32 },
};
static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[])
{
struct nft_meta *meta = nft_expr_priv(expr);
if (tb[NFTA_META_KEY] == NULL)
return -EINVAL;
meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (meta->key) {
case NFT_META_MARK:
case NFT_META_PRIORITY:
case NFT_META_NFTRACE:
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
#endif
break;
default:
return -EINVAL;
}
return 0;
}
static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_meta *meta = nft_expr_priv(expr);
NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key));
return 0;
nla_put_failure:
return -1;
}
static struct nft_expr_ops meta_target __read_mostly = {
.name = "meta",
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.owner = THIS_MODULE,
.eval = nft_meta_eval,
.init = nft_meta_init,
.dump = nft_meta_dump,
.policy = nft_meta_policy,
.maxattr = NFTA_META_MAX,
};
static int __init nft_meta_target_init(void)
{
return nft_register_expr(&meta_target);
}
static void __exit nft_meta_target_exit(void)
{
nft_unregister_expr(&meta_target);
}
module_init(nft_meta_target_init);
module_exit(nft_meta_target_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("meta");
/*
* Copyright (c) 2013 Eric Leblond <eric@regit.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Development of this code partly funded by OISF
* (http://www.openinfosecfoundation.org/)
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/jhash.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_queue.h>
static u32 jhash_initval __read_mostly;
struct nft_queue {
u16 queuenum;
u16 queues_total;
u16 flags;
u8 family;
};
static void nft_queue_eval(const struct nft_expr *expr,
struct nft_data data[NFT_REG_MAX + 1],
const struct nft_pktinfo *pkt)
{
struct nft_queue *priv = nft_expr_priv(expr);
u32 queue = priv->queuenum;
u32 ret;
if (priv->queues_total > 1) {
if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) {
int cpu = smp_processor_id();
queue = priv->queuenum + cpu % priv->queues_total;
} else {
queue = nfqueue_hash(pkt->skb, queue,
priv->queues_total, priv->family,
jhash_initval);
}
}
ret = NF_QUEUE_NR(queue);
if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
data[NFT_REG_VERDICT].verdict = ret;
}
static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
[NFTA_QUEUE_NUM] = { .type = NLA_U16 },
[NFTA_QUEUE_TOTAL] = { .type = NLA_U16 },
[NFTA_QUEUE_FLAGS] = { .type = NLA_U16 },
};
static int nft_queue_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_queue *priv = nft_expr_priv(expr);
if (tb[NFTA_QUEUE_NUM] == NULL)
return -EINVAL;
init_hashrandom(&jhash_initval);
priv->family = ctx->afi->family;
priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
if (tb[NFTA_QUEUE_TOTAL] != NULL)
priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL]));
if (tb[NFTA_QUEUE_FLAGS] != NULL) {
priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
return -EINVAL;
}
return 0;
}
static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_queue *priv = nft_expr_priv(expr);
if (nla_put_be16(skb, NFTA_QUEUE_NUM, htons(priv->queuenum)) ||
nla_put_be16(skb, NFTA_QUEUE_TOTAL, htons(priv->queues_total)) ||
nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags)))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static struct nft_expr_type nft_queue_type;
static const struct nft_expr_ops nft_queue_ops = {
.type = &nft_queue_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_queue)),
.eval = nft_queue_eval,
.init = nft_queue_init,
.dump = nft_queue_dump,
};
static struct nft_expr_type nft_queue_type __read_mostly = {
.name = "queue",
.ops = &nft_queue_ops,
.policy = nft_queue_policy,
.maxattr = NFTA_QUEUE_MAX,
.owner = THIS_MODULE,
};
static int __init nft_queue_module_init(void)
{
return nft_register_expr(&nft_queue_type);
}
static void __exit nft_queue_module_exit(void)
{
nft_unregister_expr(&nft_queue_type);
}
module_init(nft_queue_module_init);
module_exit(nft_queue_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eric Leblond <eric@regit.org>");
MODULE_ALIAS_NFT_EXPR("queue");
/*
* Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
* Copyright (c) 2013 Eric Leblond <eric@regit.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
......@@ -16,10 +17,16 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/icmp.h>
#include <net/netfilter/ipv4/nf_reject.h>
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
#include <net/netfilter/ipv6/nf_reject.h>
#endif
struct nft_reject {
enum nft_reject_types type:8;
u8 icmp_code;
u8 family;
};
static void nft_reject_eval(const struct nft_expr *expr,
......@@ -27,12 +34,25 @@ static void nft_reject_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0);
if (priv->family == NFPROTO_IPV4)
nf_send_unreach(pkt->skb, priv->icmp_code);
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
else if (priv->family == NFPROTO_IPV6)
nf_send_unreach6(net, pkt->skb, priv->icmp_code,
pkt->hooknum);
#endif
break;
case NFT_REJECT_TCP_RST:
if (priv->family == NFPROTO_IPV4)
nf_send_reset(pkt->skb, pkt->hooknum);
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
else if (priv->family == NFPROTO_IPV6)
nf_send_reset6(net, pkt->skb, pkt->hooknum);
#endif
break;
}
......@@ -53,6 +73,7 @@ static int nft_reject_init(const struct nft_ctx *ctx,
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
priv->family = ctx->afi->family;
priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
......
......@@ -11,15 +11,13 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/jhash.h>
#include <linux/netfilter.h>
#include <linux/netfilter_arp.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_NFQUEUE.h>
#include <net/netfilter/nf_queue.h>
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("Xtables: packet forwarding to netlink");
MODULE_LICENSE("GPL");
......@@ -28,7 +26,6 @@ MODULE_ALIAS("ip6t_NFQUEUE");
MODULE_ALIAS("arpt_NFQUEUE");
static u32 jhash_initval __read_mostly;
static bool rnd_inited __read_mostly;
static unsigned int
nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par)
......@@ -38,69 +35,16 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par)
return NF_QUEUE_NR(tinfo->queuenum);
}
static u32 hash_v4(const struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
/* packets in either direction go into same queue */
if ((__force u32)iph->saddr < (__force u32)iph->daddr)
return jhash_3words((__force u32)iph->saddr,
(__force u32)iph->daddr, iph->protocol, jhash_initval);
return jhash_3words((__force u32)iph->daddr,
(__force u32)iph->saddr, iph->protocol, jhash_initval);
}
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static u32 hash_v6(const struct sk_buff *skb)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
u32 a, b, c;
if ((__force u32)ip6h->saddr.s6_addr32[3] <
(__force u32)ip6h->daddr.s6_addr32[3]) {
a = (__force u32) ip6h->saddr.s6_addr32[3];
b = (__force u32) ip6h->daddr.s6_addr32[3];
} else {
b = (__force u32) ip6h->saddr.s6_addr32[3];
a = (__force u32) ip6h->daddr.s6_addr32[3];
}
if ((__force u32)ip6h->saddr.s6_addr32[1] <
(__force u32)ip6h->daddr.s6_addr32[1])
c = (__force u32) ip6h->saddr.s6_addr32[1];
else
c = (__force u32) ip6h->daddr.s6_addr32[1];
return jhash_3words(a, b, c, jhash_initval);
}
#endif
static u32
nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_NFQ_info_v1 *info = par->targinfo;
u32 queue = info->queuenum;
if (par->family == NFPROTO_IPV4)
queue += ((u64) hash_v4(skb) * info->queues_total) >> 32;
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (par->family == NFPROTO_IPV6)
queue += ((u64) hash_v6(skb) * info->queues_total) >> 32;
#endif
return queue;
}
static unsigned int
nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_NFQ_info_v1 *info = par->targinfo;
u32 queue = info->queuenum;
if (info->queues_total > 1)
queue = nfqueue_hash(skb, par);
if (info->queues_total > 1) {
queue = nfqueue_hash(skb, queue, info->queues_total,
par->family, jhash_initval);
}
return NF_QUEUE_NR(queue);
}
......@@ -120,10 +64,8 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
const struct xt_NFQ_info_v3 *info = par->targinfo;
u32 maxid;
if (unlikely(!rnd_inited)) {
get_random_bytes(&jhash_initval, sizeof(jhash_initval));
rnd_inited = true;
}
init_hashrandom(&jhash_initval);
if (info->queues_total == 0) {
pr_err("NFQUEUE: number of total queues is 0\n");
return -EINVAL;
......@@ -154,8 +96,10 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
int cpu = smp_processor_id();
queue = info->queuenum + cpu % info->queues_total;
} else
queue = nfqueue_hash(skb, par);
} else {
queue = nfqueue_hash(skb, queue, info->queues_total,
par->family, jhash_initval);
}
}
ret = NF_QUEUE_NR(queue);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment