Commit 6039e3df authored by David S. Miller's avatar David S. Miller

Merge branch 'ovs_flowids'

Joe Stringer says:

====================
openvswitch: Introduce 128-bit unique flow identifiers.

This series extends the openvswitch datapath interface for flow commands to use
128-bit unique identifiers as an alternative to the netlink-formatted flow key.
This significantly reduces the cost of assembling messages between the kernel
and userspace, in particular improving Open vSwitch revalidation performance by
40% or more.

v14:
- Perform lookup using unmasked key in legacy case.
- Fix minor checkpatch.pl style violations.

v13:
- Embed sw_flow_id in sw_flow to save memory allocation in UFID case.
- Malloc unmasked key for id in non-UFID case.
- Fix bug where non-UFID case could double-serialize keys.

v12:
- Userspace patches fully merged into Open vSwitch master
- New minor refactor patches (2,3,4)
- Merge unmasked_key, ufid representation of flow identifier in sw_flow
- Improve memory allocation sizes when serializing ufid
- Handle corner case where a flow_new is requested with a flow that has an
  identical ufid as an existing flow, but a different flow key
- Limit UFID to between 1-16 octets inclusive.
- Add various helper functions to improve readibility

v11:
- Pushed most of the prerequisite patches for this series to OVS master.
- Split out openvswitch.h interface changes from datapath implementation
- Datapath implementation to be reviewed on net-next, separately

v10:
- New patch allowing datapath to serialize masked keys
- Simplify datapath interface by accepting UFID or flow_key, but not both
- Flows set up with UFID must be queried/deleted using UFID
- Reduce sw_flow memory usage for UFID
- Don't periodically rehash UFID table in linux datapath
- Remove kernel_only UFID in linux datapath

v9:
- No kernel changes

v8:
- Rename UID -> UFID
- Fix null dereference in datapath when paired with older userspace
- All patches are reviewed/acked except datapath changes.

v7:
- Remove OVS_DP_F_INDEX_BY_UID
- Rework datapath UID serialization for variable length UIDs

v6:
- Reduce netlink conversions for all datapaths
- Various bugfixes

v5:
- Various bugfixes
- Improve logging

v4:
- Datapath memory leak fixes
- Enable UID-based terse dumping and deleting by default
- Various fixes

RFCv3:
- Add datapath implementation
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 707d79e5 74ed7ab9
...@@ -131,6 +131,19 @@ performs best-effort detection of overlapping wildcarded flows and may reject ...@@ -131,6 +131,19 @@ performs best-effort detection of overlapping wildcarded flows and may reject
some but not all of them. However, this behavior may change in future versions. some but not all of them. However, this behavior may change in future versions.
Unique flow identifiers
-----------------------
An alternative to using the original match portion of a key as the handle for
flow identification is a unique flow identifier, or "UFID". UFIDs are optional
for both the kernel and user space program.
User space programs that support UFID are expected to provide it during flow
setup in addition to the flow, then refer to the flow using the UFID for all
future operations. The kernel is not required to index flows by the original
flow key if a UFID is specified.
Basic rule for evolving flow keys Basic rule for evolving flow keys
--------------------------------- ---------------------------------
......
...@@ -205,6 +205,23 @@ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr, ...@@ -205,6 +205,23 @@ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr,
NLMSG_HDRLEN); NLMSG_HDRLEN);
} }
/**
* genlmsg_parse - parse attributes of a genetlink message
* @nlh: netlink message header
* @family: genetlink message family
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
* */
static inline int genlmsg_parse(const struct nlmsghdr *nlh,
const struct genl_family *family,
struct nlattr *tb[], int maxtype,
const struct nla_policy *policy)
{
return nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype,
policy);
}
/** /**
* genl_dump_check_consistent - check if sequence is consistent and advertise if not * genl_dump_check_consistent - check if sequence is consistent and advertise if not
* @cb: netlink callback structure that stores the sequence number * @cb: netlink callback structure that stores the sequence number
......
...@@ -459,6 +459,14 @@ struct ovs_key_nd { ...@@ -459,6 +459,14 @@ struct ovs_key_nd {
* a wildcarded match. Omitting attribute is treated as wildcarding all * a wildcarded match. Omitting attribute is treated as wildcarding all
* corresponding fields. Optional for all requests. If not present, * corresponding fields. Optional for all requests. If not present,
* all flow key bits are exact match bits. * all flow key bits are exact match bits.
* @OVS_FLOW_ATTR_UFID: A value between 1-16 octets specifying a unique
* identifier for the flow. Causes the flow to be indexed by this value rather
* than the value of the %OVS_FLOW_ATTR_KEY attribute. Optional for all
* requests. Present in notifications if the flow was created with this
* attribute.
* @OVS_FLOW_ATTR_UFID_FLAGS: A 32-bit value of OR'd %OVS_UFID_F_*
* flags that provide alternative semantics for flow installation and
* retrieval. Optional for all requests.
* *
* These attributes follow the &struct ovs_header within the Generic Netlink * These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_FLOW_* commands. * payload for %OVS_FLOW_* commands.
...@@ -474,11 +482,23 @@ enum ovs_flow_attr { ...@@ -474,11 +482,23 @@ enum ovs_flow_attr {
OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */ OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */
OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error
* logging should be suppressed. */ * logging should be suppressed. */
OVS_FLOW_ATTR_UFID, /* Variable length unique flow identifier. */
OVS_FLOW_ATTR_UFID_FLAGS,/* u32 of OVS_UFID_F_*. */
__OVS_FLOW_ATTR_MAX __OVS_FLOW_ATTR_MAX
}; };
#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
/**
* Omit attributes for notifications.
*
* If a datapath request contains an %OVS_UFID_F_OMIT_* flag, then the datapath
* may omit the corresponding %OVS_FLOW_ATTR_* from the response.
*/
#define OVS_UFID_F_OMIT_KEY (1 << 0)
#define OVS_UFID_F_OMIT_MASK (1 << 1)
#define OVS_UFID_F_OMIT_ACTIONS (1 << 2)
/** /**
* enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
* @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
......
This diff is collapsed.
...@@ -197,6 +197,16 @@ struct sw_flow_match { ...@@ -197,6 +197,16 @@ struct sw_flow_match {
struct sw_flow_mask *mask; struct sw_flow_mask *mask;
}; };
#define MAX_UFID_LENGTH 16 /* 128 bits */
struct sw_flow_id {
u32 ufid_len;
union {
u32 ufid[MAX_UFID_LENGTH / 4];
struct sw_flow_key *unmasked_key;
};
};
struct sw_flow_actions { struct sw_flow_actions {
struct rcu_head rcu; struct rcu_head rcu;
u32 actions_len; u32 actions_len;
...@@ -213,13 +223,15 @@ struct flow_stats { ...@@ -213,13 +223,15 @@ struct flow_stats {
struct sw_flow { struct sw_flow {
struct rcu_head rcu; struct rcu_head rcu;
struct hlist_node hash_node[2]; struct {
u32 hash; struct hlist_node node[2];
u32 hash;
} flow_table, ufid_table;
int stats_last_writer; /* NUMA-node id of the last writer on int stats_last_writer; /* NUMA-node id of the last writer on
* 'stats[0]'. * 'stats[0]'.
*/ */
struct sw_flow_key key; struct sw_flow_key key;
struct sw_flow_key unmasked_key; struct sw_flow_id id;
struct sw_flow_mask *mask; struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts; struct sw_flow_actions __rcu *sf_acts;
struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one
...@@ -243,6 +255,16 @@ struct arp_eth_header { ...@@ -243,6 +255,16 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */ unsigned char ar_tip[4]; /* target IP address */
} __packed; } __packed;
static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
{
return sfid->ufid_len;
}
static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
{
return !ovs_identifier_is_ufid(sfid);
}
void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
const struct sk_buff *); const struct sk_buff *);
void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
......
...@@ -1180,6 +1180,59 @@ int ovs_nla_get_match(struct sw_flow_match *match, ...@@ -1180,6 +1180,59 @@ int ovs_nla_get_match(struct sw_flow_match *match,
return err; return err;
} }
static size_t get_ufid_len(const struct nlattr *attr, bool log)
{
size_t len;
if (!attr)
return 0;
len = nla_len(attr);
if (len < 1 || len > MAX_UFID_LENGTH) {
OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
nla_len(attr), MAX_UFID_LENGTH);
return 0;
}
return len;
}
/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
* or false otherwise.
*/
bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
bool log)
{
sfid->ufid_len = get_ufid_len(attr, log);
if (sfid->ufid_len)
memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
return sfid->ufid_len;
}
int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
const struct sw_flow_key *key, bool log)
{
struct sw_flow_key *new_key;
if (ovs_nla_get_ufid(sfid, ufid, log))
return 0;
/* If UFID was not provided, use unmasked key. */
new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
if (!new_key)
return -ENOMEM;
memcpy(new_key, key, sizeof(*key));
sfid->unmasked_key = new_key;
return 0;
}
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
{
return attr ? nla_get_u32(attr) : 0;
}
/** /**
* ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
* @key: Receives extracted in_port, priority, tun_key and skb_mark. * @key: Receives extracted in_port, priority, tun_key and skb_mark.
...@@ -1216,12 +1269,12 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, ...@@ -1216,12 +1269,12 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr,
return metadata_from_nlattrs(&match, &attrs, a, false, log); return metadata_from_nlattrs(&match, &attrs, a, false, log);
} }
int ovs_nla_put_flow(const struct sw_flow_key *swkey, static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb) const struct sw_flow_key *output, bool is_mask,
struct sk_buff *skb)
{ {
struct ovs_key_ethernet *eth_key; struct ovs_key_ethernet *eth_key;
struct nlattr *nla, *encap; struct nlattr *nla, *encap;
bool is_mask = (swkey != output);
if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
goto nla_put_failure; goto nla_put_failure;
...@@ -1431,6 +1484,49 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, ...@@ -1431,6 +1484,49 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
return -EMSGSIZE; return -EMSGSIZE;
} }
int ovs_nla_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, int attr, bool is_mask,
struct sk_buff *skb)
{
int err;
struct nlattr *nla;
nla = nla_nest_start(skb, attr);
if (!nla)
return -EMSGSIZE;
err = __ovs_nla_put_key(swkey, output, is_mask, skb);
if (err)
return err;
nla_nest_end(skb, nla);
return 0;
}
/* Called with ovs_mutex or RCU read lock. */
int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
{
if (ovs_identifier_is_ufid(&flow->id))
return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
flow->id.ufid);
return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
OVS_FLOW_ATTR_KEY, false, skb);
}
/* Called with ovs_mutex or RCU read lock. */
int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
{
return ovs_nla_put_key(&flow->mask->key, &flow->key,
OVS_FLOW_ATTR_KEY, false, skb);
}
/* Called with ovs_mutex or RCU read lock. */
int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
{
return ovs_nla_put_key(&flow->key, &flow->mask->key,
OVS_FLOW_ATTR_MASK, true, skb);
}
#define MAX_ACTIONS_BUFSIZE (32 * 1024) #define MAX_ACTIONS_BUFSIZE (32 * 1024)
static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
......
...@@ -43,16 +43,25 @@ size_t ovs_key_attr_size(void); ...@@ -43,16 +43,25 @@ size_t ovs_key_attr_size(void);
void ovs_match_init(struct sw_flow_match *match, void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key, struct sw_flow_mask *mask); struct sw_flow_key *key, struct sw_flow_mask *mask);
int ovs_nla_put_flow(const struct sw_flow_key *, int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
const struct sw_flow_key *, struct sk_buff *); int attr, bool is_mask, struct sk_buff *);
int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
bool log); bool log);
int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
const struct nlattr *mask, bool log); const struct nlattr *mask, bool log);
int ovs_nla_put_egress_tunnel_key(struct sk_buff *, int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
const struct ovs_tunnel_info *); const struct ovs_tunnel_info *);
bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
const struct sw_flow_key *key, bool log);
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
int ovs_nla_copy_actions(const struct nlattr *attr, int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key, const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log); struct sw_flow_actions **sfa, bool log);
......
This diff is collapsed.
...@@ -47,9 +47,11 @@ struct table_instance { ...@@ -47,9 +47,11 @@ struct table_instance {
struct flow_table { struct flow_table {
struct table_instance __rcu *ti; struct table_instance __rcu *ti;
struct table_instance __rcu *ufid_ti;
struct list_head mask_list; struct list_head mask_list;
unsigned long last_rehash; unsigned long last_rehash;
unsigned int count; unsigned int count;
unsigned int ufid_count;
}; };
extern struct kmem_cache *flow_stats_cache; extern struct kmem_cache *flow_stats_cache;
...@@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, ...@@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *); const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
const struct sw_flow_match *match); const struct sw_flow_match *match);
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *,
const struct sw_flow_match *match); const struct sw_flow_id *);
bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask); const struct sw_flow_mask *mask);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment