Commit 7f8a436e authored by Joe Stringer's avatar Joe Stringer Committed by David S. Miller

openvswitch: Add conntrack action

Expose the kernel connection tracker via OVS. Userspace components can
make use of the CT action to populate the connection state (ct_state)
field for a flow. This state can be subsequently matched.

Exposed connection states are OVS_CS_F_*:
- NEW (0x01) - Beginning of a new connection.
- ESTABLISHED (0x02) - Part of an existing connection.
- RELATED (0x04) - Related to an established connection.
- INVALID (0x20) - Could not track the connection for this packet.
- REPLY_DIR (0x40) - This packet is in the reply direction for the flow.
- TRACKED (0x80) - This packet has been sent through conntrack.

When the CT action is executed by itself, it will send the packet
through the connection tracker and populate the ct_state field with one
or more of the connection state flags above. The CT action will always
set the TRACKED bit.

When the COMMIT flag is passed to the conntrack action, this specifies
that information about the connection should be stored. This allows
subsequent packets for the same (or related) connections to be
correlated with this connection. Sending subsequent packets for the
connection through conntrack allows the connection tracker to consider
the packets as ESTABLISHED, RELATED, and/or REPLY_DIR.

The CT action may optionally take a zone to track the flow within. This
allows connections with the same 5-tuple to be kept logically separate
from connections in other zones. If the zone is specified, then the
"ct_zone" match field will be subsequently populated with the zone id.

IP fragments are handled by transparently assembling them as part of the
CT action. The maximum received unit (MRU) size is tracked so that
refragmentation can occur during output.

IP frag handling contributed by Andy Zhou.

Based on original design by Justin Pettit.
Signed-off-by: default avatarJoe Stringer <joestringer@nicira.com>
Signed-off-by: default avatarJustin Pettit <jpettit@nicira.com>
Signed-off-by: default avatarAndy Zhou <azhou@nicira.com>
Acked-by: default avatarThomas Graf <tgraf@suug.ch>
Acked-by: default avatarPravin B Shelar <pshelar@nicira.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e79e2595
......@@ -164,6 +164,9 @@ enum ovs_packet_cmd {
* %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the
* output port is actually a tunnel port. Contains the output tunnel key
* extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes.
* @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and
* %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment
* size.
*
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_PACKET_* commands.
......@@ -180,6 +183,7 @@ enum ovs_packet_attr {
OVS_PACKET_ATTR_UNUSED2,
OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe,
error logging should be suppressed. */
OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */
__OVS_PACKET_ATTR_MAX
};
......@@ -319,6 +323,8 @@ enum ovs_key_attr {
OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls.
* The implementation may restrict
* the accepted length of the array. */
OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */
OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
#ifdef __KERNEL__
OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */
......@@ -431,6 +437,15 @@ struct ovs_key_nd {
__u8 nd_tll[ETH_ALEN];
};
/* OVS_KEY_ATTR_CT_STATE flags */
#define OVS_CS_F_NEW 0x01 /* Beginning of a new connection. */
#define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */
#define OVS_CS_F_RELATED 0x04 /* Related to an established
* connection. */
#define OVS_CS_F_INVALID 0x20 /* Could not track connection. */
#define OVS_CS_F_REPLY_DIR 0x40 /* Flow is in the reply direction. */
#define OVS_CS_F_TRACKED 0x80 /* Conntrack has occurred. */
/**
* enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
* @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
......@@ -594,6 +609,28 @@ struct ovs_action_hash {
uint32_t hash_basis;
};
/**
* enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action.
* @OVS_CT_ATTR_FLAGS: u32 connection tracking flags.
* @OVS_CT_ATTR_ZONE: u16 connection tracking zone.
*/
enum ovs_ct_attr {
OVS_CT_ATTR_UNSPEC,
OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */
OVS_CT_ATTR_ZONE, /* u16 zone id. */
__OVS_CT_ATTR_MAX
};
#define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1)
/*
* OVS_CT_ATTR_FLAGS flags - bitmask of %OVS_CT_F_*
* @OVS_CT_F_COMMIT: Commits the flow to the conntrack table. This allows
* future packets for the same connection to be identified as 'established'
* or 'related'.
*/
#define OVS_CT_F_COMMIT 0x01
/**
* enum ovs_action_attr - Action types.
*
......@@ -623,6 +660,8 @@ struct ovs_action_hash {
* indicate the new packet contents. This could potentially still be
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
* is no MPLS label stack, as determined by ethertype, no action is taken.
* @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related
* entries in the flow key.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
......@@ -648,6 +687,7 @@ enum ovs_action_attr {
* data immediately followed by a mask.
* The data must be zero for the unmasked
* bits. */
OVS_ACTION_ATTR_CT, /* One nested OVS_CT_ATTR_* . */
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
......
......@@ -31,6 +31,17 @@ config OPENVSWITCH
If unsure, say N.
config OPENVSWITCH_CONNTRACK
bool "Open vSwitch conntrack action support"
depends on OPENVSWITCH
depends on NF_CONNTRACK
default OPENVSWITCH
---help---
If you say Y here, then Open vSwitch module will be able to pass
packets through conntrack.
Say N to exclude this support and reduce the binary size.
config OPENVSWITCH_GRE
tristate "Open vSwitch GRE tunneling support"
depends on OPENVSWITCH
......
......@@ -15,6 +15,8 @@ openvswitch-y := \
vport-internal_dev.o \
vport-netdev.o
openvswitch-$(CONFIG_OPENVSWITCH_CONNTRACK) += conntrack.o
obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
......@@ -22,6 +22,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
#include <linux/netfilter_ipv6.h>
#include <linux/sctp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
......@@ -29,6 +30,7 @@
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <net/dst.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/checksum.h>
......@@ -38,6 +40,7 @@
#include "datapath.h"
#include "flow.h"
#include "conntrack.h"
#include "vport.h"
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
......@@ -52,6 +55,20 @@ struct deferred_action {
struct sw_flow_key pkt_key;
};
#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
struct ovs_frag_data {
unsigned long dst;
struct vport *vport;
struct ovs_skb_cb cb;
__be16 inner_protocol;
__u16 vlan_tci;
__be16 vlan_proto;
unsigned int l2_len;
u8 l2_data[MAX_L2_LEN];
};
static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
#define DEFERRED_ACTION_FIFO_SIZE 10
struct action_fifo {
int head;
......@@ -602,14 +619,145 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
static int ovs_vport_output(struct sock *sock, struct sk_buff *skb)
{
struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
struct vport *vport = data->vport;
if (skb_cow_head(skb, data->l2_len) < 0) {
kfree_skb(skb);
return -ENOMEM;
}
__skb_dst_copy(skb, data->dst);
*OVS_CB(skb) = data->cb;
skb->inner_protocol = data->inner_protocol;
skb->vlan_tci = data->vlan_tci;
skb->vlan_proto = data->vlan_proto;
/* Reconstruct the MAC header. */
skb_push(skb, data->l2_len);
memcpy(skb->data, &data->l2_data, data->l2_len);
ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len);
skb_reset_mac_header(skb);
ovs_vport_send(vport, skb);
return 0;
}
static unsigned int
ovs_dst_get_mtu(const struct dst_entry *dst)
{
return dst->dev->mtu;
}
static struct dst_ops ovs_dst_ops = {
.family = AF_UNSPEC,
.mtu = ovs_dst_get_mtu,
};
/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
* ovs_vport_output(), which is called once per fragmented packet.
*/
static void prepare_frag(struct vport *vport, struct sk_buff *skb)
{
unsigned int hlen = skb_network_offset(skb);
struct ovs_frag_data *data;
data = this_cpu_ptr(&ovs_frag_data_storage);
data->dst = skb->_skb_refdst;
data->vport = vport;
data->cb = *OVS_CB(skb);
data->inner_protocol = skb->inner_protocol;
data->vlan_tci = skb->vlan_tci;
data->vlan_proto = skb->vlan_proto;
data->l2_len = hlen;
memcpy(&data->l2_data, skb->data, hlen);
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
skb_pull(skb, hlen);
}
static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
__be16 ethertype)
{
if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment");
return;
}
if (ethertype == htons(ETH_P_IP)) {
struct dst_entry ovs_dst;
unsigned long orig_dst;
prepare_frag(vport, skb);
dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_dst.dev = vport->dev;
orig_dst = skb->_skb_refdst;
skb_dst_set_noref(skb, &ovs_dst);
IPCB(skb)->frag_max_size = mru;
ip_do_fragment(skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else if (ethertype == htons(ETH_P_IPV6)) {
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
unsigned long orig_dst;
struct rt6_info ovs_rt;
if (!v6ops) {
kfree_skb(skb);
return;
}
prepare_frag(vport, skb);
memset(&ovs_rt, 0, sizeof(ovs_rt));
dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_rt.dst.dev = vport->dev;
orig_dst = skb->_skb_refdst;
skb_dst_set_noref(skb, &ovs_rt.dst);
IP6CB(skb)->frag_max_size = mru;
v6ops->fragment(skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else {
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
ovs_vport_name(vport), ntohs(ethertype), mru,
vport->dev->mtu);
kfree_skb(skb);
}
}
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
struct sw_flow_key *key)
{
struct vport *vport = ovs_vport_rcu(dp, out_port);
if (likely(vport))
if (likely(vport)) {
u16 mru = OVS_CB(skb)->mru;
if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
ovs_vport_send(vport, skb);
} else if (mru <= vport->dev->mtu) {
__be16 ethertype = key->eth.type;
if (!is_flow_key_valid(key)) {
if (eth_p_mpls(skb->protocol))
ethertype = skb->inner_protocol;
else
ethertype = vlan_get_protocol(skb);
}
ovs_fragment(vport, skb, mru, ethertype);
} else {
kfree_skb(skb);
}
} else {
kfree_skb(skb);
}
}
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
......@@ -623,6 +771,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_ACTION;
upcall.mru = OVS_CB(skb)->mru;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
......@@ -816,6 +965,11 @@ static int execute_masked_set_action(struct sk_buff *skb,
err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
__be32 *));
break;
case OVS_KEY_ATTR_CT_STATE:
case OVS_KEY_ATTR_CT_ZONE:
err = -EINVAL;
break;
}
return err;
......@@ -885,7 +1039,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
if (out_skb)
do_output(dp, out_skb, prev_port);
do_output(dp, out_skb, prev_port, key);
prev_port = -1;
}
......@@ -942,6 +1096,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, key, a, attr, len);
break;
case OVS_ACTION_ATTR_CT:
err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
nla_data(a));
/* Hide stolen IP fragments from user space. */
if (err == -EINPROGRESS)
return 0;
break;
}
if (unlikely(err)) {
......@@ -951,7 +1114,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
}
if (prev_port != -1)
do_output(dp, skb, prev_port);
do_output(dp, skb, prev_port, key);
else
consume_skb(skb);
......
This diff is collapsed.
/*
* Copyright (c) 2015 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OVS_CONNTRACK_H
#define OVS_CONNTRACK_H 1
#include "flow.h"
struct ovs_conntrack_info;
enum ovs_key_attr;
#if defined(CONFIG_OPENVSWITCH_CONNTRACK)
bool ovs_ct_verify(enum ovs_key_attr attr);
int ovs_ct_copy_action(struct net *, const struct nlattr *,
const struct sw_flow_key *, struct sw_flow_actions **,
bool log);
int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
const struct ovs_conntrack_info *);
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
void ovs_ct_free_action(const struct nlattr *a);
#else
#include <linux/errno.h>
static inline bool ovs_ct_verify(int attr)
{
return false;
}
static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla,
const struct sw_flow_key *key,
struct sw_flow_actions **acts, bool log)
{
return -ENOTSUPP;
}
static inline int ovs_ct_action_to_attr(const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
return -ENOTSUPP;
}
static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
struct sw_flow_key *key,
const struct ovs_conntrack_info *info)
{
return -ENOTSUPP;
}
static inline void ovs_ct_fill_key(const struct sk_buff *skb,
struct sw_flow_key *key)
{
key->ct.state = 0;
key->ct.zone = 0;
}
static inline int ovs_ct_put_key(const struct sw_flow_key *key,
struct sk_buff *skb)
{
return 0;
}
static inline void ovs_ct_free_action(const struct nlattr *a) { }
#endif
#endif /* ovs_conntrack.h */
......@@ -275,6 +275,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall);
if (unlikely(error))
kfree_skb(skb);
......@@ -400,9 +401,23 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
if (upcall_info->actions_len)
size += nla_total_size(upcall_info->actions_len);
/* OVS_PACKET_ATTR_MRU */
if (upcall_info->mru)
size += nla_total_size(sizeof(upcall_info->mru));
return size;
}
static void pad_packet(struct datapath *dp, struct sk_buff *skb)
{
if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
size_t plen = NLA_ALIGN(skb->len) - skb->len;
if (plen > 0)
memset(skb_put(skb, plen), 0, plen);
}
}
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
......@@ -492,6 +507,16 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
nla_nest_cancel(user_skb, nla);
}
/* Add OVS_PACKET_ATTR_MRU */
if (upcall_info->mru) {
if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
upcall_info->mru)) {
err = -ENOBUFS;
goto out;
}
pad_packet(dp, user_skb);
}
/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy() */
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
......@@ -505,12 +530,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
goto out;
/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
if (plen > 0)
memset(skb_put(user_skb, plen), 0, plen);
}
pad_packet(dp, user_skb);
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
......@@ -527,6 +547,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
struct ovs_header *ovs_header = info->userhdr;
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct sw_flow_actions *acts;
struct sk_buff *packet;
......@@ -535,6 +556,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct ethhdr *eth;
struct vport *input_vport;
u16 mru = 0;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
......@@ -564,6 +586,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
else
packet->protocol = htons(ETH_P_802_2);
/* Set packet's mru */
if (a[OVS_PACKET_ATTR_MRU]) {
mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
packet->ignore_df = 1;
}
OVS_CB(packet)->mru = mru;
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
......@@ -575,7 +604,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (err)
goto err_flow_free;
err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
&flow->key, &acts, log);
if (err)
goto err_flow_free;
......@@ -586,7 +615,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
packet->mark = flow->key.phy.skb_mark;
rcu_read_lock();
dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
dp = get_dp_rcu(net, ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto err_unlock;
......@@ -598,6 +627,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (!input_vport)
goto err_unlock;
packet->dev = input_vport->dev;
OVS_CB(packet)->input_vport = input_vport;
sf_acts = rcu_dereference(flow->sf_acts);
......@@ -624,6 +654,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
};
static const struct genl_ops dp_packet_genl_ops[] = {
......@@ -880,6 +911,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow *flow = NULL, *new_flow;
......@@ -929,8 +961,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_kfree_flow;
/* Validate actions. */
error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
&acts, log);
error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
&new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
goto err_kfree_flow;
......@@ -944,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
dp = get_dp(net, ovs_header->dp_ifindex);
if (unlikely(!dp)) {
error = -ENODEV;
goto err_unlock_ovs;
......@@ -1038,7 +1070,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
static struct sw_flow_actions *get_flow_actions(struct net *net,
const struct nlattr *a,
const struct sw_flow_key *key,
const struct sw_flow_mask *mask,
bool log)
......@@ -1048,7 +1081,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
int error;
ovs_flow_mask_key(&masked_key, key, mask);
error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
if (error) {
OVS_NLERR(log,
"Actions may not be safe on all matching packets");
......@@ -1060,6 +1093,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key;
......@@ -1091,8 +1125,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
/* Validate actions. */
if (a[OVS_FLOW_ATTR_ACTIONS]) {
acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
log);
acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
&mask, log);
if (IS_ERR(acts)) {
error = PTR_ERR(acts);
goto error;
......@@ -1108,7 +1142,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
}
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
dp = get_dp(net, ovs_header->dp_ifindex);
if (unlikely(!dp)) {
error = -ENODEV;
goto err_unlock_ovs;
......
......@@ -27,6 +27,7 @@
#include <linux/u64_stats_sync.h>
#include <net/ip_tunnels.h>
#include "conntrack.h"
#include "flow.h"
#include "flow_table.h"
#include "vport.h"
......@@ -97,10 +98,13 @@ struct datapath {
* NULL if the packet is not being tunneled.
* @input_vport: The original vport packet came in on. This value is cached
* when a packet is received by OVS.
* @mru: The maximum received fragement size; 0 if the packet is not
* fragmented.
*/
struct ovs_skb_cb {
struct ip_tunnel_info *egress_tun_info;
struct vport *input_vport;
u16 mru;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
......@@ -113,6 +117,7 @@ struct ovs_skb_cb {
* then no packet is sent and the packet is accounted in the datapath's @n_lost
* counter.
* @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
* @mru: If not zero, Maximum received IP fragment size.
*/
struct dp_upcall_info {
const struct ip_tunnel_info *egress_tun_info;
......@@ -121,6 +126,7 @@ struct dp_upcall_info {
int actions_len;
u32 portid;
u8 cmd;
u16 mru;
};
/**
......
......@@ -49,6 +49,7 @@
#include "datapath.h"
#include "flow.h"
#include "flow_netlink.h"
#include "conntrack.h"
u64 ovs_flow_used_time(unsigned long flow_jiffies)
{
......@@ -707,6 +708,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->phy.priority = skb->priority;
key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
key->phy.skb_mark = skb->mark;
ovs_ct_fill_key(skb, key);
key->ovs_flow_hash = 0;
key->recirc_id = 0;
......
......@@ -111,6 +111,12 @@ struct sw_flow_key {
} nd;
} ipv6;
};
struct {
/* Connection tracking fields. */
u16 zone;
u8 state;
} ct;
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
struct sw_flow_key_range {
......
......@@ -281,7 +281,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 24);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
......@@ -290,6 +290,8 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
+ nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
+ nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */
+ nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
......@@ -339,6 +341,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
.next = ovs_tunnel_key_lens, },
[OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
[OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) },
[OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
};
static bool is_all_zero(const u8 *fp, size_t size)
......@@ -768,6 +772,21 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
return -EINVAL;
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
}
if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
ovs_ct_verify(OVS_KEY_ATTR_CT_STATE)) {
u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]);
SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
}
if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
ovs_ct_verify(OVS_KEY_ATTR_CT_ZONE)) {
u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
}
return 0;
}
......@@ -1266,6 +1285,7 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr,
memset(&match, 0, sizeof(match));
match.key = key;
memset(&key->ct, 0, sizeof(key->ct));
key->phy.in_port = DP_MAX_PORTS;
return metadata_from_nlattrs(&match, &attrs, a, false, log);
......@@ -1314,6 +1334,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
goto nla_put_failure;
if (ovs_ct_put_key(output, skb))
goto nla_put_failure;
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
......@@ -1574,6 +1597,9 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
case OVS_ACTION_ATTR_SET:
ovs_nla_free_set_action(a);
break;
case OVS_ACTION_ATTR_CT:
ovs_ct_free_action(a);
break;
}
}
......@@ -1647,8 +1673,8 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa,
return a;
}
static int add_action(struct sw_flow_actions **sfa, int attrtype,
void *data, int len, bool log)
int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
int len, bool log)
{
struct nlattr *a;
......@@ -1663,7 +1689,7 @@ static inline int add_nested_action_start(struct sw_flow_actions **sfa,
int used = (*sfa)->actions_len;
int err;
err = add_action(sfa, attrtype, NULL, 0, log);
err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
if (err)
return err;
......@@ -1679,12 +1705,12 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
a->nla_len = sfa->actions_len - st_offset;
}
static int __ovs_nla_copy_actions(const struct nlattr *attr,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log);
static int validate_and_copy_sample(const struct nlattr *attr,
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
......@@ -1716,7 +1742,7 @@ static int validate_and_copy_sample(const struct nlattr *attr,
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
if (start < 0)
return start;
err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
nla_data(probability), sizeof(u32), log);
if (err)
return err;
......@@ -1724,7 +1750,7 @@ static int validate_and_copy_sample(const struct nlattr *attr,
if (st_acts < 0)
return st_acts;
err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
eth_type, vlan_tci, log);
if (err)
return err;
......@@ -2058,7 +2084,7 @@ static int copy_action(const struct nlattr *from,
return 0;
}
static int __ovs_nla_copy_actions(const struct nlattr *attr,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
......@@ -2082,7 +2108,8 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
[OVS_ACTION_ATTR_SET] = (u32)-1,
[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
[OVS_ACTION_ATTR_CT] = (u32)-1,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
......@@ -2189,13 +2216,20 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
break;
case OVS_ACTION_ATTR_SAMPLE:
err = validate_and_copy_sample(a, key, depth, sfa,
err = validate_and_copy_sample(net, a, key, depth, sfa,
eth_type, vlan_tci, log);
if (err)
return err;
skip_copy = true;
break;
case OVS_ACTION_ATTR_CT:
err = ovs_ct_copy_action(net, a, key, sfa, log);
if (err)
return err;
skip_copy = true;
break;
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
......@@ -2214,7 +2248,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
}
/* 'key' must be the masked key. */
int ovs_nla_copy_actions(const struct nlattr *attr,
int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
{
......@@ -2225,7 +2259,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return PTR_ERR(*sfa);
(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
key->eth.tci, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
......@@ -2350,6 +2384,13 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
if (err)
return err;
break;
case OVS_ACTION_ATTR_CT:
err = ovs_ct_action_to_attr(nla_data(a), skb);
if (err)
return err;
break;
default:
if (nla_put(skb, type, nla_len(a), nla_data(a)))
return -EMSGSIZE;
......
......@@ -62,9 +62,11 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
const struct sw_flow_key *key, bool log);
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
int ovs_nla_copy_actions(const struct nlattr *attr,
int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log);
int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype,
void *data, int len, bool log);
int ovs_nla_put_actions(const struct nlattr *attr,
int len, struct sk_buff *skb);
......
......@@ -484,6 +484,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->egress_tun_info = NULL;
OVS_CB(skb)->mru = 0;
/* Extract flow from 'skb' into 'key'. */
error = ovs_flow_key_extract(tun_info, skb, &key);
if (unlikely(error)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment