Commit f0a40400 authored by David S. Miller's avatar David S. Miller

Merge branch 'ovs-L3-encap'

Jiri Benc says:

====================
openvswitch: support for layer 3 encapsulated packets

At the core of this patch set is removing the assumption in Open vSwitch
datapath that all packets have Ethernet header.

The implementation relies on the presence of pop_eth and push_eth actions
in datapath flows to facilitate adding and removing Ethernet headers as
appropriate. The construction of such flows is left up to user-space.

This series is based on work by Simon Horman, Lorand Jakab, Thomas Morin and
others. I kept Lorand's and Simon's s-o-b in the patches that are derived
from v11 to record their authorship of parts of the code.

Changes from v12 to v13:

* Addressed Pravin's feedback.
* Removed the GRE vport conversion patch; L3 GRE ports should be created by
  rtnetlink instead.

Main changes from v11 to v12:

* The patches were restructured and split differently for easier review.
* They were rebased and adjusted to the current net-next. Especially MPLS
  handling is different (and easier) thanks to the recent MPLS GSO rework.
* Several bugs were discovered and fixed. The most notable is fragment
  handling: header adjustment for ARPHRD_NONE devices on tx needs to be done
  after refragmentation, not before it. This required significant changes in
  the patchset. Another one is stricter checking of attributes (match on L2
  vs. L3 packet) at the kernel level.
* Instead of is_layer3 bool, a mac_proto field is used.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c540594f 217ac77a
......@@ -705,6 +705,15 @@ enum ovs_nat_attr {
#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1)
/*
* struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument.
* @addresses: Source and destination MAC addresses.
* @eth_type: Ethernet type
*/
struct ovs_action_push_eth {
struct ovs_key_ethernet addresses;
};
/**
* enum ovs_action_attr - Action types.
*
......@@ -738,6 +747,10 @@ enum ovs_nat_attr {
* is no MPLS label stack, as determined by ethertype, no action is taken.
* @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related
* entries in the flow key.
* @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the
* packet.
* @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
* packet.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
......@@ -765,6 +778,8 @@ enum ovs_action_attr {
* bits. */
OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */
OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */
OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */
OVS_ACTION_ATTR_POP_ETH, /* No argument. */
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
......
......@@ -66,6 +66,7 @@ struct ovs_frag_data {
u16 vlan_tci;
__be16 vlan_proto;
unsigned int l2_len;
u8 mac_proto;
u8 l2_data[MAX_L2_LEN];
};
......@@ -137,12 +138,12 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
static void invalidate_flow_key(struct sw_flow_key *key)
{
key->eth.type = htons(0);
key->mac_proto |= SW_FLOW_KEY_INVALID;
}
static bool is_flow_key_valid(const struct sw_flow_key *key)
{
return !!key->eth.type;
return !(key->mac_proto & SW_FLOW_KEY_INVALID);
}
static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
......@@ -186,7 +187,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
skb->protocol = mpls->mpls_ethertype;
invalidate_flow_key(key);
......@@ -196,7 +198,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
const __be16 ethertype)
{
struct ethhdr *hdr;
int err;
err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
......@@ -212,11 +213,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb->mac_len);
/* mpls_hdr() is used to locate the ethertype field correctly in the
* presence of VLAN tags.
*/
hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
update_ethertype(skb, hdr, ethertype);
if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
struct ethhdr *hdr;
/* mpls_hdr() is used to locate the ethertype field correctly in the
* presence of VLAN tags.
*/
hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
update_ethertype(skb, hdr, ethertype);
}
if (eth_p_mpls(skb->protocol))
skb->protocol = ethertype;
......@@ -312,6 +317,47 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
/* pop_eth does not support VLAN packets as this action is never called
* for them.
*/
static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
{
skb_pull_rcsum(skb, ETH_HLEN);
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
/* safe right before invalidate_flow_key */
key->mac_proto = MAC_PROTO_NONE;
invalidate_flow_key(key);
return 0;
}
static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_eth *ethh)
{
struct ethhdr *hdr;
/* Add the new Ethernet header */
if (skb_cow_head(skb, ETH_HLEN) < 0)
return -ENOMEM;
skb_push(skb, ETH_HLEN);
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
hdr = eth_hdr(skb);
ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
hdr->h_proto = skb->protocol;
skb_postpush_rcsum(skb, hdr, ETH_HLEN);
/* safe right before invalidate_flow_key */
key->mac_proto = MAC_PROTO_ETHERNET;
invalidate_flow_key(key);
return 0;
}
static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
__be32 addr, __be32 new_addr)
{
......@@ -673,7 +719,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
skb_reset_mac_len(skb);
}
ovs_vport_send(vport, skb);
ovs_vport_send(vport, skb, data->mac_proto);
return 0;
}
......@@ -692,7 +738,7 @@ static struct dst_ops ovs_dst_ops = {
* ovs_vport_output(), which is called once per fragmented packet.
*/
static void prepare_frag(struct vport *vport, struct sk_buff *skb,
u16 orig_network_offset)
u16 orig_network_offset, u8 mac_proto)
{
unsigned int hlen = skb_network_offset(skb);
struct ovs_frag_data *data;
......@@ -705,6 +751,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
data->network_offset = orig_network_offset;
data->vlan_tci = skb->vlan_tci;
data->vlan_proto = skb->vlan_proto;
data->mac_proto = mac_proto;
data->l2_len = hlen;
memcpy(&data->l2_data, skb->data, hlen);
......@@ -713,7 +760,8 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
}
static void ovs_fragment(struct net *net, struct vport *vport,
struct sk_buff *skb, u16 mru, __be16 ethertype)
struct sk_buff *skb, u16 mru,
struct sw_flow_key *key)
{
u16 orig_network_offset = 0;
......@@ -727,11 +775,12 @@ static void ovs_fragment(struct net *net, struct vport *vport,
goto err;
}
if (ethertype == htons(ETH_P_IP)) {
if (key->eth.type == htons(ETH_P_IP)) {
struct dst_entry ovs_dst;
unsigned long orig_dst;
prepare_frag(vport, skb, orig_network_offset);
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_dst.dev = vport->dev;
......@@ -742,7 +791,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else if (ethertype == htons(ETH_P_IPV6)) {
} else if (key->eth.type == htons(ETH_P_IPV6)) {
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
unsigned long orig_dst;
struct rt6_info ovs_rt;
......@@ -751,7 +800,8 @@ static void ovs_fragment(struct net *net, struct vport *vport,
goto err;
}
prepare_frag(vport, skb, orig_network_offset);
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
memset(&ovs_rt, 0, sizeof(ovs_rt));
dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
......@@ -765,7 +815,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
refdst_drop(orig_dst);
} else {
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
ovs_vport_name(vport), ntohs(ethertype), mru,
ovs_vport_name(vport), ntohs(key->eth.type), mru,
vport->dev->mtu);
goto err;
}
......@@ -785,26 +835,19 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
u32 cutlen = OVS_CB(skb)->cutlen;
if (unlikely(cutlen > 0)) {
if (skb->len - cutlen > ETH_HLEN)
if (skb->len - cutlen > ovs_mac_header_len(key))
pskb_trim(skb, skb->len - cutlen);
else
pskb_trim(skb, ETH_HLEN);
pskb_trim(skb, ovs_mac_header_len(key));
}
if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
ovs_vport_send(vport, skb);
if (likely(!mru ||
(skb->len <= mru + vport->dev->hard_header_len))) {
ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
} else if (mru <= vport->dev->mtu) {
struct net *net = read_pnet(&dp->net);
__be16 ethertype = key->eth.type;
if (!is_flow_key_valid(key)) {
if (eth_p_mpls(skb->protocol))
ethertype = skb->inner_protocol;
else
ethertype = vlan_get_protocol(skb);
}
ovs_fragment(net, vport, skb, mru, ethertype);
ovs_fragment(net, vport, skb, mru, key);
} else {
kfree_skb(skb);
}
......@@ -1198,6 +1241,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
if (err)
return err == -EINPROGRESS ? 0 : err;
break;
case OVS_ACTION_ATTR_PUSH_ETH:
err = push_eth(skb, key, nla_data(a));
break;
case OVS_ACTION_ATTR_POP_ETH:
err = pop_eth(skb, key);
break;
}
if (unlikely(err)) {
......
......@@ -560,7 +560,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct sw_flow *flow;
struct sw_flow_actions *sf_acts;
struct datapath *dp;
struct ethhdr *eth;
struct vport *input_vport;
u16 mru = 0;
int len;
......@@ -581,17 +580,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
skb_reset_mac_header(packet);
eth = eth_hdr(packet);
/* Normally, setting the skb 'protocol' field would be handled by a
* call to eth_type_trans(), but it assumes there's a sending
* device, which we may not have. */
if (eth_proto_is_802_3(eth->h_proto))
packet->protocol = eth->h_proto;
else
packet->protocol = htons(ETH_P_802_2);
/* Set packet's mru */
if (a[OVS_PACKET_ATTR_MRU]) {
mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
......@@ -618,6 +606,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
rcu_assign_pointer(flow->sf_acts, acts);
packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark;
packet->protocol = flow->key.eth.type;
rcu_read_lock();
dp = get_dp_rcu(net, ovs_header->dp_ifindex);
......
......@@ -334,14 +334,17 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
return 1;
}
static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
static void clear_vlan(struct sw_flow_key *key)
{
int res;
key->eth.vlan.tci = 0;
key->eth.vlan.tpid = 0;
key->eth.cvlan.tci = 0;
key->eth.cvlan.tpid = 0;
}
static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
{
int res;
if (skb_vlan_tag_present(skb)) {
key->eth.vlan.tci = htons(skb->vlan_tci);
......@@ -483,17 +486,20 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
*
* Returns 0 if successful, otherwise a negative errno value.
*
* Initializes @skb header pointers as follows:
* Initializes @skb header fields as follows:
*
* - skb->mac_header: the Ethernet header.
* - skb->mac_header: the L2 header.
*
* - skb->network_header: just past the Ethernet header, or just past the
* VLAN header, to the first byte of the Ethernet payload.
* - skb->network_header: just past the L2 header, or just past the
* VLAN header, to the first byte of the L2 payload.
*
* - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
* on output, then just past the IP header, if one is present and
* of a correct length, otherwise the same as skb->network_header.
* For other key->eth.type values it is left untouched.
*
* - skb->protocol: the type of the data starting at skb->network_header.
* Equals to key->eth.type.
*/
static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
......@@ -505,28 +511,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
skb_reset_mac_header(skb);
/* Link layer. We are guaranteed to have at least the 14 byte Ethernet
* header in the linear data area.
*/
eth = eth_hdr(skb);
ether_addr_copy(key->eth.src, eth->h_source);
ether_addr_copy(key->eth.dst, eth->h_dest);
/* Link layer. */
clear_vlan(key);
if (key->mac_proto == MAC_PROTO_NONE) {
if (unlikely(eth_type_vlan(skb->protocol)))
return -EINVAL;
__skb_pull(skb, 2 * ETH_ALEN);
/* We are going to push all headers that we pull, so no need to
* update skb->csum here.
*/
skb_reset_network_header(skb);
} else {
eth = eth_hdr(skb);
ether_addr_copy(key->eth.src, eth->h_source);
ether_addr_copy(key->eth.dst, eth->h_dest);
if (unlikely(parse_vlan(skb, key)))
return -ENOMEM;
__skb_pull(skb, 2 * ETH_ALEN);
/* We are going to push all headers that we pull, so no need to
* update skb->csum here.
*/
key->eth.type = parse_ethertype(skb);
if (unlikely(key->eth.type == htons(0)))
return -ENOMEM;
if (unlikely(parse_vlan(skb, key)))
return -ENOMEM;
skb->protocol = parse_ethertype(skb);
if (unlikely(skb->protocol == htons(0)))
return -ENOMEM;
skb_reset_network_header(skb);
skb_reset_network_header(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
}
skb_reset_mac_len(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
key->eth.type = skb->protocol;
/* Network layer. */
if (key->eth.type == htons(ETH_P_IP)) {
......@@ -721,9 +734,25 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
return key_extract(skb, key);
}
static int key_extract_mac_proto(struct sk_buff *skb)
{
switch (skb->dev->type) {
case ARPHRD_ETHER:
return MAC_PROTO_ETHERNET;
case ARPHRD_NONE:
if (skb->protocol == htons(ETH_P_TEB))
return MAC_PROTO_ETHERNET;
return MAC_PROTO_NONE;
}
WARN_ON_ONCE(1);
return -EINVAL;
}
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
int res;
/* Extract metadata from packet. */
if (tun_info) {
key->tun_proto = ip_tunnel_info_af(tun_info);
......@@ -751,6 +780,10 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->phy.skb_mark = skb->mark;
ovs_ct_fill_key(skb, key);
key->ovs_flow_hash = 0;
res = key_extract_mac_proto(skb);
if (res < 0)
return res;
key->mac_proto = res;
key->recirc_id = 0;
return key_extract(skb, key);
......@@ -767,5 +800,29 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
if (err)
return err;
if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
/* key_extract assumes that skb->protocol is set-up for
* layer 3 packets which is the case for other callers,
* in particular packets recieved from the network stack.
* Here the correct value can be set from the metadata
* extracted above.
*/
skb->protocol = key->eth.type;
} else {
struct ethhdr *eth;
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
/* Normally, setting the skb 'protocol' field would be
* handled by a call to eth_type_trans(), but it assumes
* there's a sending device, which we may not have.
*/
if (eth_proto_is_802_3(eth->h_proto))
skb->protocol = eth->h_proto;
else
skb->protocol = htons(ETH_P_802_2);
}
return key_extract(skb, key);
}
......@@ -37,6 +37,12 @@
struct sk_buff;
enum sw_flow_mac_proto {
MAC_PROTO_NONE = 0,
MAC_PROTO_ETHERNET,
};
#define SW_FLOW_KEY_INVALID 0x80
/* Store options at the end of the array if they are less than the
* maximum size. This allows us to get the benefits of variable length
* matching for small options.
......@@ -68,6 +74,7 @@ struct sw_flow_key {
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} __packed phy; /* Safe when right after 'tun_key'. */
u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */
u8 tun_proto; /* Protocol of encapsulating tunnel. */
u32 ovs_flow_hash; /* Datapath computed hash value. */
u32 recirc_id; /* Recirculation ID. */
......@@ -206,6 +213,21 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */
} __packed;
static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key)
{
return key->mac_proto & ~SW_FLOW_KEY_INVALID;
}
static inline u16 __ovs_mac_header_len(u8 mac_proto)
{
return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0;
}
static inline u16 ovs_mac_header_len(const struct sw_flow_key *key)
{
return __ovs_mac_header_len(ovs_key_mac_proto(key));
}
static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
{
return sfid->ufid_len;
......
......@@ -123,7 +123,7 @@ static void update_range(struct sw_flow_match *match,
static bool match_validate(const struct sw_flow_match *match,
u64 key_attrs, u64 mask_attrs, bool log)
{
u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
u64 key_expected = 0;
u64 mask_allowed = key_attrs; /* At most allow all key attributes */
/* The following mask attributes allowed only if they
......@@ -969,10 +969,33 @@ static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
return 0;
}
static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
u64 *attrs, const struct nlattr **a,
bool is_mask, bool log)
{
__be16 eth_type;
eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
if (is_mask) {
/* Always exact match EtherType. */
eth_type = htons(0xffff);
} else if (!eth_proto_is_802_3(eth_type)) {
OVS_NLERR(log, "EtherType %x is less than min %x",
ntohs(eth_type), ETH_P_802_3_MIN);
return -EINVAL;
}
SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
*attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
return 0;
}
static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
u64 *attrs, const struct nlattr **a,
bool is_mask, bool log)
{
u8 mac_proto = MAC_PROTO_ETHERNET;
if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
......@@ -1059,6 +1082,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
sizeof(*cl), is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
}
/* For layer 3 packets the Ethernet type is provided
* and treated as metadata but no MAC addresses are provided.
*/
if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
(*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
mac_proto = MAC_PROTO_NONE;
/* Always exact match mac_proto */
SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
if (mac_proto == MAC_PROTO_NONE)
return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
log);
return 0;
}
......@@ -1081,33 +1119,26 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
SW_FLOW_KEY_MEMCPY(match, eth.dst,
eth_key->eth_dst, ETH_ALEN, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
}
if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
/* VLAN attribute is always parsed before getting here since it
* may occur multiple times.
*/
OVS_NLERR(log, "VLAN attribute unexpected.");
return -EINVAL;
}
if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
__be16 eth_type;
eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
if (is_mask) {
/* Always exact match EtherType. */
eth_type = htons(0xffff);
} else if (!eth_proto_is_802_3(eth_type)) {
OVS_NLERR(log, "EtherType %x is less than min %x",
ntohs(eth_type), ETH_P_802_3_MIN);
if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
/* VLAN attribute is always parsed before getting here since it
* may occur multiple times.
*/
OVS_NLERR(log, "VLAN attribute unexpected.");
return -EINVAL;
}
SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
} else if (!is_mask) {
SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
log);
if (err)
return err;
} else if (!is_mask) {
SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
}
} else if (!match->key->eth.type) {
OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
return -EINVAL;
}
if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
......@@ -1556,42 +1587,44 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (ovs_ct_put_key(output, skb))
goto nla_put_failure;
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
eth_key = nla_data(nla);
ether_addr_copy(eth_key->eth_src, output->eth.src);
ether_addr_copy(eth_key->eth_dst, output->eth.dst);
if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
if (!swkey->eth.vlan.tci)
goto unencap;
if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
eth_key = nla_data(nla);
ether_addr_copy(eth_key->eth_src, output->eth.src);
ether_addr_copy(eth_key->eth_dst, output->eth.dst);
if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
goto nla_put_failure;
in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
if (!swkey->eth.cvlan.tci)
encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
if (!swkey->eth.vlan.tci)
goto unencap;
if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
goto nla_put_failure;
in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
if (!swkey->eth.cvlan.tci)
goto unencap;
}
}
}
if (swkey->eth.type == htons(ETH_P_802_2)) {
/*
* Ethertype 802.2 is represented in the netlink with omitted
* OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
* 0xffff in the mask attribute. Ethertype can also
* be wildcarded.
*/
if (is_mask && output->eth.type)
if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
output->eth.type))
goto nla_put_failure;
goto unencap;
if (swkey->eth.type == htons(ETH_P_802_2)) {
/*
* Ethertype 802.2 is represented in the netlink with omitted
* OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
* 0xffff in the mask attribute. Ethertype can also
* be wildcarded.
*/
if (is_mask && output->eth.type)
if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
output->eth.type))
goto nla_put_failure;
goto unencap;
}
}
if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
......@@ -2126,8 +2159,8 @@ static bool validate_masked(u8 *data, int len)
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa,
bool *skip_copy, __be16 eth_type, bool masked, bool log)
struct sw_flow_actions **sfa, bool *skip_copy,
u8 mac_proto, __be16 eth_type, bool masked, bool log)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
......@@ -2157,9 +2190,12 @@ static int validate_set(const struct nlattr *a,
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
case OVS_KEY_ATTR_ETHERNET:
break;
case OVS_KEY_ATTR_ETHERNET:
if (mac_proto != MAC_PROTO_ETHERNET)
return -EINVAL;
case OVS_KEY_ATTR_TUNNEL:
if (masked)
return -EINVAL; /* Masked tunnel set not supported. */
......@@ -2324,6 +2360,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;
......@@ -2346,6 +2383,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
[OVS_ACTION_ATTR_CT] = (u32)-1,
[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
[OVS_ACTION_ATTR_POP_ETH] = 0,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
......@@ -2394,10 +2433,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
}
case OVS_ACTION_ATTR_POP_VLAN:
if (mac_proto != MAC_PROTO_ETHERNET)
return -EINVAL;
vlan_tci = htons(0);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
if (mac_proto != MAC_PROTO_ETHERNET)
return -EINVAL;
vlan = nla_data(a);
if (!eth_type_vlan(vlan->vlan_tpid))
return -EINVAL;
......@@ -2447,14 +2490,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa,
&skip_copy, eth_type, false, log);
&skip_copy, mac_proto, eth_type,
false, log);
if (err)
return err;
break;
case OVS_ACTION_ATTR_SET_MASKED:
err = validate_set(a, key, sfa,
&skip_copy, eth_type, true, log);
&skip_copy, mac_proto, eth_type,
true, log);
if (err)
return err;
break;
......@@ -2474,6 +2519,22 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
skip_copy = true;
break;
case OVS_ACTION_ATTR_PUSH_ETH:
/* Disallow pushing an Ethernet header if one
* is already present */
if (mac_proto != MAC_PROTO_NONE)
return -EINVAL;
mac_proto = MAC_PROTO_NONE;
break;
case OVS_ACTION_ATTR_POP_ETH:
if (mac_proto != MAC_PROTO_ETHERNET)
return -EINVAL;
if (vlan_tci & htons(VLAN_TAG_PRESENT))
return -EINVAL;
mac_proto = MAC_PROTO_ETHERNET;
break;
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
......
......@@ -57,8 +57,10 @@ static void netdev_port_receive(struct sk_buff *skb)
if (unlikely(!skb))
return;
skb_push(skb, ETH_HLEN);
skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
if (skb->dev->type == ARPHRD_ETHER) {
skb_push(skb, ETH_HLEN);
skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
}
ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
return;
error:
......@@ -97,7 +99,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
}
if (vport->dev->flags & IFF_LOOPBACK ||
vport->dev->type != ARPHRD_ETHER ||
(vport->dev->type != ARPHRD_ETHER &&
vport->dev->type != ARPHRD_NONE) ||
ovs_is_internal_dev(vport->dev)) {
err = -EINVAL;
goto error_put;
......
......@@ -464,9 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
return 0;
}
static unsigned int packet_length(const struct sk_buff *skb)
static unsigned int packet_length(const struct sk_buff *skb,
struct net_device *dev)
{
unsigned int length = skb->len - ETH_HLEN;
unsigned int length = skb->len - dev->hard_header_len;
if (!skb_vlan_tag_present(skb) &&
eth_type_vlan(skb->protocol))
......@@ -480,14 +481,34 @@ static unsigned int packet_length(const struct sk_buff *skb)
return length;
}
void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
{
int mtu = vport->dev->mtu;
if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
switch (vport->dev->type) {
case ARPHRD_NONE:
if (mac_proto == MAC_PROTO_ETHERNET) {
skb_reset_network_header(skb);
skb_reset_mac_len(skb);
skb->protocol = htons(ETH_P_TEB);
} else if (mac_proto != MAC_PROTO_NONE) {
WARN_ON_ONCE(1);
goto drop;
}
break;
case ARPHRD_ETHER:
if (mac_proto != MAC_PROTO_ETHERNET)
goto drop;
break;
default:
goto drop;
}
if (unlikely(packet_length(skb, vport->dev) > mtu &&
!skb_is_gso(skb))) {
net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
vport->dev->name,
packet_length(skb), mtu);
packet_length(skb, vport->dev), mtu);
vport->dev->stats.tx_errors++;
goto drop;
}
......
......@@ -197,6 +197,6 @@ int __ovs_vport_ops_register(struct vport_ops *ops);
})
void ovs_vport_ops_unregister(struct vport_ops *ops);
void ovs_vport_send(struct vport *vport, struct sk_buff *skb);
void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto);
#endif /* vport.h */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment