Commit 16b25d1a authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

This batch contains Netfilter updates for net-next:

1) Add nft_setelem_parse_key() helper function.

2) Add NFTA_SET_ELEM_KEY_END to specify a range with one single element.

3) Add NFTA_SET_DESC_CONCAT to describe the set element concatenation,
   from Stefano Brivio.

4) Add bitmap_cut() to copy n-bits from source to destination,
   from Stefano Brivio.

5) Add set to match on arbitrary concatenations, from Stefano Brivio.

6) Add selftest for this new set type. An extract of Stefano's
   description follows:

"Existing nftables set implementations allow matching entries with
interval expressions (rbtree), e.g. 192.0.2.1-192.0.2.4, entries
specifying field concatenation (hash, rhash), e.g. 192.0.2.1:22,
but not both.

In other words, none of the set types allows matching on range
expressions for more than one packet field at a time, such as ipset
does with types bitmap:ip,mac, and, to a more limited extent
(netmasks, not arbitrary ranges), with types hash:net,net,
hash:net,port, hash:ip,port,net, and hash:net,port,net.

As a pure hash-based approach is unsuitable for matching on ranges,
and "proxying" the existing red-black tree type looks impractical as
elements would need to be shared and managed across all employed
trees, this new set implementation intends to fill the functionality
gap by employing a relatively novel approach.

The fundamental idea, illustrated in deeper detail in patch 5/9, is to
use lookup tables classifying a small number of grouped bits from each
field, and map the lookup results in a way that yields a verdict for
the full set of specified fields.

The grouping bit aspect is loosely inspired by the Grouper algorithm,
by Jay Ligatti, Josh Kuhn, and Chris Gage (see patch 5/9 for the full
reference).

A reference, stand-alone implementation of the algorithm itself is
available at:
        https://pipapo.lameexcu.se

Some notes about possible future optimisations are also mentioned
there. This algorithm reduces the matching problem to, essentially,
a repetitive sequence of simple bitwise operations, and is
particularly suitable to be optimised by leveraging SIMD instruction
sets."
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 32efcc06 611973c1
......@@ -53,6 +53,7 @@
* bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above
* bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
* bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest
* bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask)
* bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src)
* bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit)
......@@ -133,6 +134,9 @@ extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
unsigned int shift, unsigned int nbits);
extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
unsigned int shift, unsigned int nbits);
extern void bitmap_cut(unsigned long *dst, const unsigned long *src,
unsigned int first, unsigned int cut,
unsigned int nbits);
extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
......
......@@ -231,6 +231,7 @@ struct nft_userdata {
* struct nft_set_elem - generic representation of set elements
*
* @key: element key
* @key_end: closing element key
* @priv: element private data and extensions
*/
struct nft_set_elem {
......@@ -238,6 +239,10 @@ struct nft_set_elem {
u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
struct nft_data val;
} key;
union {
u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
struct nft_data val;
} key_end;
void *priv;
};
......@@ -259,11 +264,15 @@ struct nft_set_iter {
* @klen: key length
* @dlen: data length
* @size: number of set elements
* @field_len: length of each field in concatenation, bytes
* @field_count: number of concatenated fields in element
*/
struct nft_set_desc {
unsigned int klen;
unsigned int dlen;
unsigned int size;
u8 field_len[NFT_REG32_COUNT];
u8 field_count;
};
/**
......@@ -404,6 +413,8 @@ void nft_unregister_set(struct nft_set_type *type);
* @dtype: data type (verdict or numeric type defined by userspace)
* @objtype: object type (see NFT_OBJECT_* definitions)
* @size: maximum set size
* @field_len: length of each field in concatenation, bytes
* @field_count: number of concatenated fields in element
* @use: number of rules references to this set
* @nelems: number of elements
* @ndeact: number of deactivated elements queued for removal
......@@ -430,6 +441,8 @@ struct nft_set {
u32 dtype;
u32 objtype;
u32 size;
u8 field_len[NFT_REG32_COUNT];
u8 field_count;
u32 use;
atomic_t nelems;
u32 ndeact;
......@@ -502,6 +515,7 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
* enum nft_set_extensions - set extension type IDs
*
* @NFT_SET_EXT_KEY: element key
* @NFT_SET_EXT_KEY_END: upper bound element key, for ranges
* @NFT_SET_EXT_DATA: mapping data
* @NFT_SET_EXT_FLAGS: element flags
* @NFT_SET_EXT_TIMEOUT: element timeout
......@@ -513,6 +527,7 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
*/
enum nft_set_extensions {
NFT_SET_EXT_KEY,
NFT_SET_EXT_KEY_END,
NFT_SET_EXT_DATA,
NFT_SET_EXT_FLAGS,
NFT_SET_EXT_TIMEOUT,
......@@ -606,6 +621,11 @@ static inline struct nft_data *nft_set_ext_key(const struct nft_set_ext *ext)
return nft_set_ext(ext, NFT_SET_EXT_KEY);
}
static inline struct nft_data *nft_set_ext_key_end(const struct nft_set_ext *ext)
{
return nft_set_ext(ext, NFT_SET_EXT_KEY_END);
}
static inline struct nft_data *nft_set_ext_data(const struct nft_set_ext *ext)
{
return nft_set_ext(ext, NFT_SET_EXT_DATA);
......@@ -655,7 +675,7 @@ static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext)
void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *data,
const u32 *key, const u32 *key_end, const u32 *data,
u64 timeout, u64 expiration, gfp_t gfp);
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr);
......
......@@ -74,6 +74,7 @@ extern struct nft_set_type nft_set_hash_type;
extern struct nft_set_type nft_set_hash_fast_type;
extern struct nft_set_type nft_set_rbtree_type;
extern struct nft_set_type nft_set_bitmap_type;
extern struct nft_set_type nft_set_pipapo_type;
struct nft_expr;
struct nft_regs;
......
......@@ -48,6 +48,7 @@ enum nft_registers {
#define NFT_REG_SIZE 16
#define NFT_REG32_SIZE 4
#define NFT_REG32_COUNT (NFT_REG32_15 - NFT_REG32_00 + 1)
/**
* enum nft_verdicts - nf_tables internal verdicts
......@@ -301,14 +302,28 @@ enum nft_set_policies {
* enum nft_set_desc_attributes - set element description
*
* @NFTA_SET_DESC_SIZE: number of elements in set (NLA_U32)
* @NFTA_SET_DESC_CONCAT: description of field concatenation (NLA_NESTED)
*/
enum nft_set_desc_attributes {
NFTA_SET_DESC_UNSPEC,
NFTA_SET_DESC_SIZE,
NFTA_SET_DESC_CONCAT,
__NFTA_SET_DESC_MAX
};
#define NFTA_SET_DESC_MAX (__NFTA_SET_DESC_MAX - 1)
/**
* enum nft_set_field_attributes - attributes of concatenated fields
*
* @NFTA_SET_FIELD_LEN: length of single field, in bits (NLA_U32)
*/
enum nft_set_field_attributes {
NFTA_SET_FIELD_UNSPEC,
NFTA_SET_FIELD_LEN,
__NFTA_SET_FIELD_MAX
};
#define NFTA_SET_FIELD_MAX (__NFTA_SET_FIELD_MAX - 1)
/**
* enum nft_set_attributes - nf_tables set netlink attributes
*
......@@ -370,6 +385,7 @@ enum nft_set_elem_flags {
* @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY)
* @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes)
* @NFTA_SET_ELEM_OBJREF: stateful object reference (NLA_STRING)
* @NFTA_SET_ELEM_KEY_END: closing key value (NLA_NESTED: nft_data)
*/
enum nft_set_elem_attributes {
NFTA_SET_ELEM_UNSPEC,
......@@ -382,6 +398,7 @@ enum nft_set_elem_attributes {
NFTA_SET_ELEM_EXPR,
NFTA_SET_ELEM_PAD,
NFTA_SET_ELEM_OBJREF,
NFTA_SET_ELEM_KEY_END,
__NFTA_SET_ELEM_MAX
};
#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1)
......
......@@ -168,6 +168,72 @@ void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
}
EXPORT_SYMBOL(__bitmap_shift_left);
/**
* bitmap_cut() - remove bit region from bitmap and right shift remaining bits
* @dst: destination bitmap, might overlap with src
* @src: source bitmap
* @first: start bit of region to be removed
* @cut: number of bits to remove
* @nbits: bitmap size, in bits
*
* Set the n-th bit of @dst iff the n-th bit of @src is set and
* n is less than @first, or the m-th bit of @src is set for any
* m such that @first <= n < nbits, and m = n + @cut.
*
* In pictures, example for a big-endian 32-bit architecture:
*
* @src:
* 31 63
* | |
* 10000000 11000001 11110010 00010101 10000000 11000001 01110010 00010101
* | | | |
* 16 14 0 32
*
* if @cut is 3, and @first is 14, bits 14-16 in @src are cut and @dst is:
*
* 31 63
* | |
* 10110000 00011000 00110010 00010101 00010000 00011000 00101110 01000010
* | | |
* 14 (bit 17 0 32
* from @src)
*
* Note that @dst and @src might overlap partially or entirely.
*
* This is implemented in the obvious way, with a shift and carry
* step for each moved bit. Optimisation is left as an exercise
* for the compiler.
*/
void bitmap_cut(unsigned long *dst, const unsigned long *src,
unsigned int first, unsigned int cut, unsigned int nbits)
{
unsigned int len = BITS_TO_LONGS(nbits);
unsigned long keep = 0, carry;
int i;
memmove(dst, src, len * sizeof(*dst));
if (first % BITS_PER_LONG) {
keep = src[first / BITS_PER_LONG] &
(~0UL >> (BITS_PER_LONG - first % BITS_PER_LONG));
}
while (cut--) {
for (i = first / BITS_PER_LONG; i < len; i++) {
if (i < len - 1)
carry = dst[i + 1] & 1UL;
else
carry = 0;
dst[i] = (dst[i] >> 1) | (carry << (BITS_PER_LONG - 1));
}
}
dst[first / BITS_PER_LONG] &= ~0UL << (first % BITS_PER_LONG);
dst[first / BITS_PER_LONG] |= keep;
}
EXPORT_SYMBOL(bitmap_cut);
int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits)
{
......
......@@ -81,7 +81,8 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nft_chain_route.o nf_tables_offload.o
nf_tables_set-objs := nf_tables_set_core.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
nft_set_pipapo.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NF_TABLES_SET) += nf_tables_set.o
......
This diff is collapsed.
......@@ -9,12 +9,14 @@ static int __init nf_tables_set_module_init(void)
nft_register_set(&nft_set_rhash_type);
nft_register_set(&nft_set_bitmap_type);
nft_register_set(&nft_set_rbtree_type);
nft_register_set(&nft_set_pipapo_type);
return 0;
}
static void __exit nf_tables_set_module_exit(void)
{
nft_unregister_set(&nft_set_pipapo_type);
nft_unregister_set(&nft_set_rbtree_type);
nft_unregister_set(&nft_set_bitmap_type);
nft_unregister_set(&nft_set_rhash_type);
......
......@@ -54,7 +54,7 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
timeout = priv->timeout ? : set->timeout;
elem = nft_set_elem_init(set, &priv->tmpl,
&regs->data[priv->sreg_key],
&regs->data[priv->sreg_key], NULL,
&regs->data[priv->sreg_data],
timeout, 0, GFP_ATOMIC);
if (elem == NULL)
......
This diff is collapsed.
......@@ -466,6 +466,9 @@ static void nft_rbtree_destroy(const struct nft_set *set)
static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est)
{
if (desc->field_count > 1)
return false;
if (desc->size)
est->size = sizeof(struct nft_rbtree) +
desc->size * sizeof(struct nft_rbtree_elem);
......
......@@ -2,6 +2,7 @@
# Makefile for netfilter selftests
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh
include ../lib.mk
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment