Commit be7f4578 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'tls-device-offload-for-bond'

Tariq Toukan says:

====================
TLS device offload for Bond

This series opens TX and RX TLS device offload for bond interfaces.
This allows bond interfaces to benefit from capable lower devices.

We add a new ndo_sk_get_lower_dev() to be used to get the lower dev that
corresponds to a given socket.
The TLS module uses it to interact directly with the lowest device in
chain, and invoke the control operations in tlsdev_ops. This means that the
bond interface doesn't have his own struct tlsdev_ops instance and
derived logic/callbacks.

To keep simple track of the HW and SW TLS contexts, we bind each socket to
a specific lower device for the socket's whole lifetime. This is logically
valid (and similar to the SW kTLS behavior) in the following bond configuration,
so we restrict the offload support to it:

((mode == balance-xor) or (mode == 802.3ad))
and xmit_hash_policy == layer3+4.

In this design, TLS TX/RX offload feature flags of the bond device are
independent from the lower devices. They reflect the current features state,
but are not directly controllable.
This is because the bond driver is bypassed by the call to
ndo_sk_get_lower_dev(), without him knowing who the caller is.
The bond TLS feature flags are set/cleared only according to the configuration
of the mode and xmit_hash_policy.

Bypass is true only for the control flow. Packets in fast path still go through
the bond logic.

The design here differs from the xfrm/ipsec offload, where the bond driver
has his own copy of struct xfrmdev_ops and callbacks.
====================

Link: https://lore.kernel.org/r/20210117145949.8632-1-tariqt@nvidia.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 41fb4c1b 4e5a7332
......@@ -83,6 +83,9 @@
#include <net/bonding.h>
#include <net/bond_3ad.h>
#include <net/bond_alb.h>
#if IS_ENABLED(CONFIG_TLS_DEVICE)
#include <net/tls.h>
#endif
#include "bonding_priv.h"
......@@ -301,6 +304,19 @@ netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
return dev_queue_xmit(skb);
}
bool bond_sk_check(struct bonding *bond)
{
switch (BOND_MODE(bond)) {
case BOND_MODE_8023AD:
case BOND_MODE_XOR:
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
return true;
fallthrough;
default:
return false;
}
}
/*---------------------------------- VLAN -----------------------------------*/
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
......@@ -1212,6 +1228,13 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
netdev_features_t mask;
struct slave *slave;
#if IS_ENABLED(CONFIG_TLS_DEVICE)
if (bond_sk_check(bond))
features |= BOND_TLS_FEATURES;
else
features &= ~BOND_TLS_FEATURES;
#endif
mask = features;
features &= ~NETIF_F_ONE_FOR_ALL;
......@@ -3541,6 +3564,16 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
return true;
}
static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
{
hash ^= (__force u32)flow_get_u32_dst(flow) ^
(__force u32)flow_get_u32_src(flow);
hash ^= (hash >> 16);
hash ^= (hash >> 8);
/* discard lowest hash bit to deal with the common even ports pattern */
return hash >> 1;
}
/**
* bond_xmit_hash - generate a hash value based on the xmit policy
* @bond: bonding device
......@@ -3571,12 +3604,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
else
memcpy(&hash, &flow.ports.ports, sizeof(hash));
}
hash ^= (__force u32)flow_get_u32_dst(&flow) ^
(__force u32)flow_get_u32_src(&flow);
hash ^= (hash >> 16);
hash ^= (hash >> 8);
return hash >> 1;
return bond_ip_hash(hash, &flow);
}
/*-------------------------- Device entry points ----------------------------*/
......@@ -4549,6 +4578,95 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
return NULL;
}
static void bond_sk_to_flow(struct sock *sk, struct flow_keys *flow)
{
switch (sk->sk_family) {
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
if (sk->sk_ipv6only ||
ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) {
flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
flow->addrs.v6addrs.src = inet6_sk(sk)->saddr;
flow->addrs.v6addrs.dst = sk->sk_v6_daddr;
break;
}
fallthrough;
#endif
default: /* AF_INET */
flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
flow->addrs.v4addrs.src = inet_sk(sk)->inet_rcv_saddr;
flow->addrs.v4addrs.dst = inet_sk(sk)->inet_daddr;
break;
}
flow->ports.src = inet_sk(sk)->inet_sport;
flow->ports.dst = inet_sk(sk)->inet_dport;
}
/**
* bond_sk_hash_l34 - generate a hash value based on the socket's L3 and L4 fields
* @sk: socket to use for headers
*
* This function will extract the necessary field from the socket and use
* them to generate a hash based on the LAYER34 xmit_policy.
* Assumes that sk is a TCP or UDP socket.
*/
static u32 bond_sk_hash_l34(struct sock *sk)
{
struct flow_keys flow;
u32 hash;
bond_sk_to_flow(sk, &flow);
/* L4 */
memcpy(&hash, &flow.ports.ports, sizeof(hash));
/* L3 */
return bond_ip_hash(hash, &flow);
}
static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond,
struct sock *sk)
{
struct bond_up_slave *slaves;
struct slave *slave;
unsigned int count;
u32 hash;
slaves = rcu_dereference(bond->usable_slaves);
count = slaves ? READ_ONCE(slaves->count) : 0;
if (unlikely(!count))
return NULL;
hash = bond_sk_hash_l34(sk);
slave = slaves->arr[hash % count];
return slave->dev;
}
static struct net_device *bond_sk_get_lower_dev(struct net_device *dev,
struct sock *sk)
{
struct bonding *bond = netdev_priv(dev);
struct net_device *lower = NULL;
rcu_read_lock();
if (bond_sk_check(bond))
lower = __bond_sk_get_lower_dev(bond, sk);
rcu_read_unlock();
return lower;
}
#if IS_ENABLED(CONFIG_TLS_DEVICE)
static netdev_tx_t bond_tls_device_xmit(struct bonding *bond, struct sk_buff *skb,
struct net_device *dev)
{
if (likely(bond_get_slave_by_dev(bond, tls_get_ctx(skb->sk)->netdev)))
return bond_dev_queue_xmit(bond, skb, tls_get_ctx(skb->sk)->netdev);
return bond_tx_drop(dev, skb);
}
#endif
static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
......@@ -4557,6 +4675,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
!bond_slave_override(bond, skb))
return NETDEV_TX_OK;
#if IS_ENABLED(CONFIG_TLS_DEVICE)
if (skb->sk && tls_is_sk_tx_device_offloaded(skb->sk))
return bond_tls_device_xmit(bond, skb, dev);
#endif
switch (BOND_MODE(bond)) {
case BOND_MODE_ROUNDROBIN:
return bond_xmit_roundrobin(skb, dev);
......@@ -4685,6 +4808,7 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_fix_features = bond_fix_features,
.ndo_features_check = passthru_features_check,
.ndo_get_xmit_slave = bond_xmit_get_slave,
.ndo_sk_get_lower_dev = bond_sk_get_lower_dev,
};
static const struct device_type bond_type = {
......@@ -4756,6 +4880,10 @@ void bond_setup(struct net_device *bond_dev)
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
bond_dev->features |= BOND_XFRM_FEATURES;
#endif /* CONFIG_XFRM_OFFLOAD */
#if IS_ENABLED(CONFIG_TLS_DEVICE)
if (bond_sk_check(bond))
bond_dev->features |= BOND_TLS_FEATURES;
#endif
}
/* Destroy a bonding device.
......
......@@ -745,17 +745,30 @@ const struct bond_option *bond_opt_get(unsigned int option)
return &bond_opts[option];
}
static void bond_set_xfrm_features(struct net_device *bond_dev, u64 mode)
static bool bond_set_xfrm_features(struct bonding *bond)
{
if (!IS_ENABLED(CONFIG_XFRM_OFFLOAD))
return;
return false;
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
bond->dev->wanted_features |= BOND_XFRM_FEATURES;
else
bond->dev->wanted_features &= ~BOND_XFRM_FEATURES;
return true;
}
static bool bond_set_tls_features(struct bonding *bond)
{
if (!IS_ENABLED(CONFIG_TLS_DEVICE))
return false;
if (mode == BOND_MODE_ACTIVEBACKUP)
bond_dev->wanted_features |= BOND_XFRM_FEATURES;
if (bond_sk_check(bond))
bond->dev->wanted_features |= BOND_TLS_FEATURES;
else
bond_dev->wanted_features &= ~BOND_XFRM_FEATURES;
bond->dev->wanted_features &= ~BOND_TLS_FEATURES;
netdev_update_features(bond_dev);
return true;
}
static int bond_option_mode_set(struct bonding *bond,
......@@ -780,13 +793,20 @@ static int bond_option_mode_set(struct bonding *bond,
if (newval->value == BOND_MODE_ALB)
bond->params.tlb_dynamic_lb = 1;
if (bond->dev->reg_state == NETREG_REGISTERED)
bond_set_xfrm_features(bond->dev, newval->value);
/* don't cache arp_validate between modes */
bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
bond->params.mode = newval->value;
if (bond->dev->reg_state == NETREG_REGISTERED) {
bool update = false;
update |= bond_set_xfrm_features(bond);
update |= bond_set_tls_features(bond);
if (update)
netdev_update_features(bond->dev);
}
return 0;
}
......@@ -1219,6 +1239,10 @@ static int bond_option_xmit_hash_policy_set(struct bonding *bond,
newval->string, newval->value);
bond->params.xmit_policy = newval->value;
if (bond->dev->reg_state == NETREG_REGISTERED)
if (bond_set_tls_features(bond))
netdev_update_features(bond->dev);
return 0;
}
......
......@@ -1398,6 +1398,8 @@ struct net_device_ops {
struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev,
struct sk_buff *skb,
bool all_slaves);
struct net_device* (*ndo_sk_get_lower_dev)(struct net_device *dev,
struct sock *sk);
netdev_features_t (*ndo_fix_features)(struct net_device *dev,
netdev_features_t features);
int (*ndo_set_features)(struct net_device *dev,
......@@ -2858,6 +2860,8 @@ int init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
struct sk_buff *skb,
bool all_slaves);
struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
struct sock *sk);
struct net_device *dev_get_by_index(struct net *net, int ifindex);
struct net_device *__dev_get_by_index(struct net *net, int ifindex);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
......
......@@ -89,6 +89,8 @@
#define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \
NETIF_F_GSO_ESP)
#define BOND_TLS_FEATURES (NETIF_F_HW_TLS_TX | NETIF_F_HW_TLS_RX)
#ifdef CONFIG_NET_POLL_CONTROLLER
extern atomic_t netpoll_block_tx;
......@@ -265,6 +267,8 @@ struct bond_vlan_tag {
unsigned short vlan_id;
};
bool bond_sk_check(struct bonding *bond);
/**
* Returns NULL if the net_device does not belong to any of the bond's slaves
*
......
......@@ -8105,6 +8105,39 @@ struct net_device *netdev_get_xmit_slave(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_get_xmit_slave);
static struct net_device *netdev_sk_get_lower_dev(struct net_device *dev,
struct sock *sk)
{
const struct net_device_ops *ops = dev->netdev_ops;
if (!ops->ndo_sk_get_lower_dev)
return NULL;
return ops->ndo_sk_get_lower_dev(dev, sk);
}
/**
* netdev_sk_get_lowest_dev - Get the lowest device in chain given device and socket
* @dev: device
* @sk: the socket
*
* %NULL is returned if no lower device is found.
*/
struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
struct sock *sk)
{
struct net_device *lower;
lower = netdev_sk_get_lower_dev(dev, sk);
while (lower) {
dev = lower;
lower = netdev_sk_get_lower_dev(dev, sk);
}
return dev;
}
EXPORT_SYMBOL(netdev_sk_get_lowest_dev);
static void netdev_adjacent_add_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
......
......@@ -113,7 +113,7 @@ static struct net_device *get_netdev_for_sock(struct sock *sk)
struct net_device *netdev = NULL;
if (likely(dst)) {
netdev = dst->dev;
netdev = netdev_sk_get_lowest_dev(dst->dev, sk);
dev_hold(netdev);
}
......@@ -1329,6 +1329,8 @@ static int tls_dev_event(struct notifier_block *this, unsigned long event,
switch (event) {
case NETDEV_REGISTER:
case NETDEV_FEAT_CHANGE:
if (netif_is_bond_master(dev))
return NOTIFY_DONE;
if ((dev->features & NETIF_F_HW_TLS_RX) &&
!dev->tlsdev_ops->tls_dev_resync)
return NOTIFY_BAD;
......
......@@ -424,7 +424,7 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
struct net_device *dev,
struct sk_buff *skb)
{
if (dev == tls_get_ctx(sk)->netdev)
if (dev == tls_get_ctx(sk)->netdev || netif_is_bond_master(dev))
return skb;
return tls_sw_fallback(sk, skb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment