Commit bd1903b7 authored by Tonghao Zhang's avatar Tonghao Zhang Committed by David S. Miller

net: openvswitch: add hash info to upcall

When using the kernel datapath, the upcall don't
include skb hash info relatived. That will introduce
some problem, because the hash of skb is important
in kernel stack. For example, VXLAN module uses
it to select UDP src port. The tx queue selection
may also use the hash in stack.

Hash is computed in different ways. Hash is random
for a TCP socket, and hash may be computed in hardware,
or software stack. Recalculation hash is not easy.

Hash of TCP socket is computed:
tcp_v4_connect
    -> sk_set_txhash (is random)

__tcp_transmit_skb
    -> skb_set_hash_from_sk

There will be one upcall, without information of skb
hash, to ovs-vswitchd, for the first packet of a TCP
session. The rest packets will be processed in Open vSwitch
modules, hash kept. If this tcp session is forward to
VXLAN module, then the UDP src port of first tcp packet
is different from rest packets.

TCP packets may come from the host or dockers, to Open vSwitch.
To fix it, we store the hash info to upcall, and restore hash
when packets sent back.

+---------------+          +-------------------------+
|   Docker/VMs  |          |     ovs-vswitchd        |
+----+----------+          +-+--------------------+--+
     |                       ^                    |
     |                       |                    |
     |                       |  upcall            v restore packet hash (not recalculate)
     |                     +-+--------------------+--+
     |  tap netdev         |                         |   vxlan module
     +--------------->     +-->  Open vSwitch ko     +-->
       or internal type    |                         |
                           +-------------------------+

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.htmlSigned-off-by: default avatarTonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: default avatarPravin B Shelar <pshelar@ovn.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 839554b7
...@@ -173,6 +173,7 @@ enum ovs_packet_cmd { ...@@ -173,6 +173,7 @@ enum ovs_packet_cmd {
* @OVS_PACKET_ATTR_LEN: Packet size before truncation. * @OVS_PACKET_ATTR_LEN: Packet size before truncation.
* %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment
* size. * size.
* @OVS_PACKET_ATTR_HASH: Packet hash info (e.g. hash, sw_hash and l4_hash in skb).
* *
* These attributes follow the &struct ovs_header within the Generic Netlink * These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_PACKET_* commands. * payload for %OVS_PACKET_* commands.
...@@ -190,7 +191,8 @@ enum ovs_packet_attr { ...@@ -190,7 +191,8 @@ enum ovs_packet_attr {
OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe, OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe,
error logging should be suppressed. */ error logging should be suppressed. */
OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */ OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */
OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */ OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */
OVS_PACKET_ATTR_HASH, /* Packet hash. */
__OVS_PACKET_ATTR_MAX __OVS_PACKET_ATTR_MAX
}; };
......
...@@ -350,7 +350,8 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, ...@@ -350,7 +350,8 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
+ nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */ + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
+ nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
/* OVS_PACKET_ATTR_USERDATA */ /* OVS_PACKET_ATTR_USERDATA */
if (upcall_info->userdata) if (upcall_info->userdata)
...@@ -393,6 +394,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, ...@@ -393,6 +394,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
size_t len; size_t len;
unsigned int hlen; unsigned int hlen;
int err, dp_ifindex; int err, dp_ifindex;
u64 hash;
dp_ifindex = get_dpifindex(dp); dp_ifindex = get_dpifindex(dp);
if (!dp_ifindex) if (!dp_ifindex)
...@@ -504,6 +506,19 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, ...@@ -504,6 +506,19 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
pad_packet(dp, user_skb); pad_packet(dp, user_skb);
} }
/* Add OVS_PACKET_ATTR_HASH */
hash = skb_get_hash_raw(skb);
if (skb->sw_hash)
hash |= OVS_PACKET_HASH_SW_BIT;
if (skb->l4_hash)
hash |= OVS_PACKET_HASH_L4_BIT;
if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
err = -ENOBUFS;
goto out;
}
/* Only reserve room for attribute header, packet data is added /* Only reserve room for attribute header, packet data is added
* in skb_zerocopy() */ * in skb_zerocopy() */
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
...@@ -543,6 +558,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) ...@@ -543,6 +558,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp; struct datapath *dp;
struct vport *input_vport; struct vport *input_vport;
u16 mru = 0; u16 mru = 0;
u64 hash;
int len; int len;
int err; int err;
bool log = !a[OVS_PACKET_ATTR_PROBE]; bool log = !a[OVS_PACKET_ATTR_PROBE];
...@@ -568,6 +584,14 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) ...@@ -568,6 +584,14 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
} }
OVS_CB(packet)->mru = mru; OVS_CB(packet)->mru = mru;
if (a[OVS_PACKET_ATTR_HASH]) {
hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
__skb_set_hash(packet, hash & 0xFFFFFFFFULL,
!!(hash & OVS_PACKET_HASH_SW_BIT),
!!(hash & OVS_PACKET_HASH_L4_BIT));
}
/* Build an sw_flow for sending this packet. */ /* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc(); flow = ovs_flow_alloc();
err = PTR_ERR(flow); err = PTR_ERR(flow);
......
...@@ -139,6 +139,18 @@ struct ovs_net { ...@@ -139,6 +139,18 @@ struct ovs_net {
bool xt_label; bool xt_label;
}; };
/**
* enum ovs_pkt_hash_types - hash info to include with a packet
* to send to userspace.
* @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
* @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
* over transport ports.
*/
enum ovs_pkt_hash_types {
OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
};
extern unsigned int ovs_net_id; extern unsigned int ovs_net_id;
void ovs_lock(void); void ovs_lock(void);
void ovs_unlock(void); void ovs_unlock(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment