Commit 910714f1 authored by David S. Miller's avatar David S. Miller

Merge branch 'ovs-ct-zone'

Yi-Hung Wei says:

====================
openvswitch: Support conntrack zone limit

Currently, nf_conntrack_max is used to limit the maximum number of
conntrack entries in the conntrack table for every network namespace.
For the VMs and containers that reside in the same namespace,
they share the same conntrack table, and the total # of conntrack entries
for all the VMs and containers are limited by nf_conntrack_max.  In this
case, if one of the VM/container abuses the usage the conntrack entries,
it blocks the others from committing valid conntrack entries into the
conntrack table.  Even if we can possibly put the VM in different network
namespace, the current nf_conntrack_max configuration is kind of rigid
that we cannot limit different VM/container to have different # conntrack
entries.

To address the aforementioned issue, this patch proposes to have a
fine-grained mechanism that could further limit the # of conntrack entries
per-zone.  For example, we can designate different zone to different VM,
and set conntrack limit to each zone.  By providing this isolation, a
mis-behaved VM only consumes the conntrack entries in its own zone, and
it will not influence other well-behaved VMs.  Moreover, the users can
set various conntrack limit to different zone based on their preference.

The proposed implementation utilizes Netfilter's nf_conncount backend
to count the number of connections in a particular zone.  If the number of
connection is above a configured limitation, OVS will return ENOMEM to the
userspace.  If userspace does not configure the zone limit, the limit
defaults to zero that is no limitation, which is backward compatible to
the behavior without this patch.

The first patch defines the conntrack limit netlink definition, and the
second patch provides the implementation.

v4->v5:
  - Addresses comments from Parvin that include log error msg in
    ovs_ct_limit_init(), handle deletion for default limit, and
    add a common helper for get zone limit.
  - Rebases to master.

v3->v4:
  - Addresses comments from Parvin that include simplify netlink API,
    and remove unncessary RCU lockings.
  - Rebases to master.

v2->v3:
  - Addresses comments from Parvin that include using static keys to check
    if ovs_ct_limit features is used, only check ct_limit when a ct entry
    is unconfirmed, and reports rate limited warning messages when the ct
    limit is reached.
  - Rebases to master.

v1->v2:
  - Fixes commit log typos suggested by Greg.
  - Fixes memory free issue that Julia found.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d7c52fc8 11efd5cb
......@@ -937,4 +937,32 @@ enum ovs_meter_band_type {
#define OVS_METER_BAND_TYPE_MAX (__OVS_METER_BAND_TYPE_MAX - 1)
/* Conntrack limit */
#define OVS_CT_LIMIT_FAMILY "ovs_ct_limit"
#define OVS_CT_LIMIT_MCGROUP "ovs_ct_limit"
#define OVS_CT_LIMIT_VERSION 0x1
enum ovs_ct_limit_cmd {
OVS_CT_LIMIT_CMD_UNSPEC,
OVS_CT_LIMIT_CMD_SET, /* Add or modify ct limit. */
OVS_CT_LIMIT_CMD_DEL, /* Delete ct limit. */
OVS_CT_LIMIT_CMD_GET /* Get ct limit. */
};
enum ovs_ct_limit_attr {
OVS_CT_LIMIT_ATTR_UNSPEC,
OVS_CT_LIMIT_ATTR_ZONE_LIMIT, /* Nested struct ovs_zone_limit. */
__OVS_CT_LIMIT_ATTR_MAX
};
#define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
#define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
struct ovs_zone_limit {
int zone_id;
__u32 limit;
__u32 count;
};
#endif /* _LINUX_OPENVSWITCH_H */
......@@ -9,7 +9,8 @@ config OPENVSWITCH
(NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
(!NF_NAT || NF_NAT) && \
(!NF_NAT_IPV4 || NF_NAT_IPV4) && \
(!NF_NAT_IPV6 || NF_NAT_IPV6)))
(!NF_NAT_IPV6 || NF_NAT_IPV6) && \
(!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT)))
select LIBCRC32C
select MPLS
select NET_MPLS_GSO
......
This diff is collapsed.
......@@ -17,10 +17,11 @@
#include "flow.h"
struct ovs_conntrack_info;
struct ovs_ct_limit_info;
enum ovs_key_attr;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
void ovs_ct_init(struct net *);
int ovs_ct_init(struct net *);
void ovs_ct_exit(struct net *);
bool ovs_ct_verify(struct net *, enum ovs_key_attr attr);
int ovs_ct_copy_action(struct net *, const struct nlattr *,
......@@ -44,7 +45,7 @@ void ovs_ct_free_action(const struct nlattr *a);
#else
#include <linux/errno.h>
static inline void ovs_ct_init(struct net *net) { }
static inline int ovs_ct_init(struct net *net) { return 0; }
static inline void ovs_ct_exit(struct net *net) { }
......@@ -104,4 +105,8 @@ static inline void ovs_ct_free_action(const struct nlattr *a) { }
#define CT_SUPPORTED_MASK 0
#endif /* CONFIG_NF_CONNTRACK */
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
extern struct genl_family dp_ct_limit_genl_family;
#endif
#endif /* ovs_conntrack.h */
......@@ -2288,6 +2288,9 @@ static struct genl_family * const dp_genl_families[] = {
&dp_flow_genl_family,
&dp_packet_genl_family,
&dp_meter_genl_family,
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
&dp_ct_limit_genl_family,
#endif
};
static void dp_unregister_genl(int n_families)
......@@ -2323,8 +2326,7 @@ static int __net_init ovs_init_net(struct net *net)
INIT_LIST_HEAD(&ovs_net->dps);
INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
ovs_ct_init(net);
return 0;
return ovs_ct_init(net);
}
static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
......@@ -2469,3 +2471,4 @@ MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
......@@ -144,6 +144,9 @@ struct dp_upcall_info {
struct ovs_net {
struct list_head dps;
struct work_struct dp_notify_work;
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_ct_limit_info *ct_limit_info;
#endif
/* Module reference for configuring conntrack. */
bool xt_label;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment