Commit e7164313 authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-updates-2018-12-19' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2018-12-19

This series adds some misc updates and the support for tunnels over VLAN
tc offloads.

From Miroslav Lichvar, patches #1,2
1) Update timecounter at least twice per counter overflow
2) Extend PTP gettime function to read system clock

From Gavi Teitz, patch #3
3) Increase VF representors' SQ size to 128

From Eli Britstein and Or Gerlitz, patches #4-10
4) Adds the capability to support tunnels over VLAN device.

Patch 4 avoids crash for TC flow with egress upper devices

Patch 5 refactors tunnel routing devs into a helper function

Patch 6 avoids crash for TC encap flows with vlan on underlay

Patches 7-8 refactor encap tunnel header preparing code.

Patch 9 adds support for building VLAN tagged ETH header.

Patch 10 adds support for tunnel routing to VLAN device.

From Aviv, patches 11,12 to fix earlier VF lag series
5) Fix query_nic_sys_image_guid() error during init
6) Fix LAG requirement when CONFIG_MLX5_ESWITCH is off
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c337680f a6491744
......@@ -6,16 +6,50 @@
#include "lib/vxlan.h"
#include "en/tc_tun.h"
static int get_route_and_out_devs(struct mlx5e_priv *priv,
struct net_device *dev,
struct net_device **route_dev,
struct net_device **out_dev)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_dev, *uplink_upper;
bool dst_is_lag_dev;
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
uplink_upper = netdev_master_upper_dev_get(uplink_dev);
dst_is_lag_dev = (uplink_upper &&
netif_is_lag_master(uplink_upper) &&
dev == uplink_upper &&
mlx5_lag_is_sriov(priv->mdev));
/* if the egress device isn't on the same HW e-switch or
* it's a LAG device, use the uplink
*/
if (!switchdev_port_same_parent_id(priv->netdev, dev) ||
dst_is_lag_dev) {
*route_dev = uplink_dev;
*out_dev = *route_dev;
} else {
*route_dev = dev;
if (is_vlan_dev(*route_dev))
*out_dev = uplink_dev;
else if (mlx5e_eswitch_rep(dev))
*out_dev = *route_dev;
else
return -EOPNOTSUPP;
}
return 0;
}
static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct net_device **out_dev,
struct net_device **route_dev,
struct flowi4 *fl4,
struct neighbour **out_n,
u8 *out_ttl)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_dev, *uplink_upper;
bool dst_is_lag_dev;
struct rtable *rt;
struct neighbour *n = NULL;
......@@ -30,21 +64,9 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
#endif
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
uplink_upper = netdev_master_upper_dev_get(uplink_dev);
dst_is_lag_dev = (uplink_upper &&
netif_is_lag_master(uplink_upper) &&
rt->dst.dev == uplink_upper &&
mlx5_lag_is_sriov(priv->mdev));
/* if the egress device isn't on the same HW e-switch or
* it's a LAG device, use the uplink
*/
if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev) ||
dst_is_lag_dev)
*out_dev = uplink_dev;
else
*out_dev = rt->dst.dev;
ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
if (ret < 0)
return ret;
if (!(*out_ttl))
*out_ttl = ip4_dst_hoplimit(&rt->dst);
......@@ -68,6 +90,7 @@ static const char *mlx5e_netdev_kind(struct net_device *dev)
static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct net_device **out_dev,
struct net_device **route_dev,
struct flowi6 *fl6,
struct neighbour **out_n,
u8 *out_ttl)
......@@ -76,9 +99,6 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
struct dst_entry *dst;
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_dev, *uplink_upper;
bool dst_is_lag_dev;
int ret;
ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
......@@ -89,21 +109,9 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
if (!(*out_ttl))
*out_ttl = ip6_dst_hoplimit(dst);
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
uplink_upper = netdev_master_upper_dev_get(uplink_dev);
dst_is_lag_dev = (uplink_upper &&
netif_is_lag_master(uplink_upper) &&
dst->dev == uplink_upper &&
mlx5_lag_is_sriov(priv->mdev));
/* if the egress device isn't on the same HW e-switch or
* it's a LAG device, use the uplink
*/
if (!switchdev_port_same_parent_id(priv->netdev, dst->dev) ||
dst_is_lag_dev)
*out_dev = uplink_dev;
else
*out_dev = dst->dev;
ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
if (ret < 0)
return ret;
#else
return -EOPNOTSUPP;
#endif
......@@ -176,24 +184,61 @@ static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
return err;
}
static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
struct mlx5e_encap_entry *e,
u16 proto)
{
struct ethhdr *eth = (struct ethhdr *)buf;
char *ip;
ether_addr_copy(eth->h_dest, e->h_dest);
ether_addr_copy(eth->h_source, dev->dev_addr);
if (is_vlan_dev(dev)) {
struct vlan_hdr *vlan = (struct vlan_hdr *)
((char *)eth + ETH_HLEN);
ip = (char *)vlan + VLAN_HLEN;
eth->h_proto = vlan_dev_vlan_proto(dev);
vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
vlan->h_vlan_encapsulated_proto = htons(proto);
} else {
eth->h_proto = htons(proto);
ip = (char *)eth + ETH_HLEN;
}
return ip;
}
int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e)
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
int ipv4_encap_size = ETH_HLEN +
sizeof(struct iphdr) +
e->tunnel_hlen;
struct ip_tunnel_key *tun_key = &e->tun_info.key;
struct net_device *out_dev;
struct net_device *out_dev, *route_dev;
struct neighbour *n = NULL;
struct flowi4 fl4 = {};
int ipv4_encap_size;
char *encap_header;
struct ethhdr *eth;
u8 nud_state, ttl;
struct iphdr *ip;
int err;
/* add the IP fields */
fl4.flowi4_tos = tun_key->tos;
fl4.daddr = tun_key->u.ipv4.dst;
fl4.saddr = tun_key->u.ipv4.src;
ttl = tun_key->ttl;
err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev,
&fl4, &n, &ttl);
if (err)
return err;
ipv4_encap_size =
(is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct iphdr) +
e->tunnel_hlen;
if (max_encap_size < ipv4_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
ipv4_encap_size, max_encap_size);
......@@ -204,17 +249,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
if (!encap_header)
return -ENOMEM;
/* add the IP fields */
fl4.flowi4_tos = tun_key->tos;
fl4.daddr = tun_key->u.ipv4.dst;
fl4.saddr = tun_key->u.ipv4.src;
ttl = tun_key->ttl;
err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
&fl4, &n, &ttl);
if (err)
goto free_encap;
/* used by mlx5e_detach_encap to lookup a neigh hash table
* entry in the neigh hash table when a user deletes a rule
*/
......@@ -238,13 +272,10 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
read_unlock_bh(&n->lock);
/* add ethernet header */
eth = (struct ethhdr *)encap_header;
ether_addr_copy(eth->h_dest, e->h_dest);
ether_addr_copy(eth->h_source, out_dev->dev_addr);
eth->h_proto = htons(ETH_P_IP);
ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
ETH_P_IP);
/* add ip header */
ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
ip->tos = tun_key->tos;
ip->version = 0x4;
ip->ihl = 0x5;
......@@ -295,19 +326,32 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
int ipv6_encap_size = ETH_HLEN +
sizeof(struct ipv6hdr) +
e->tunnel_hlen;
struct ip_tunnel_key *tun_key = &e->tun_info.key;
struct net_device *out_dev;
struct net_device *out_dev, *route_dev;
struct neighbour *n = NULL;
struct flowi6 fl6 = {};
struct ipv6hdr *ip6h;
int ipv6_encap_size;
char *encap_header;
struct ethhdr *eth;
u8 nud_state, ttl;
int err;
ttl = tun_key->ttl;
fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
fl6.daddr = tun_key->u.ipv6.dst;
fl6.saddr = tun_key->u.ipv6.src;
err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev,
&fl6, &n, &ttl);
if (err)
return err;
ipv6_encap_size =
(is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct ipv6hdr) +
e->tunnel_hlen;
if (max_encap_size < ipv6_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
ipv6_encap_size, max_encap_size);
......@@ -318,17 +362,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
if (!encap_header)
return -ENOMEM;
ttl = tun_key->ttl;
fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
fl6.daddr = tun_key->u.ipv6.dst;
fl6.saddr = tun_key->u.ipv6.src;
err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
&fl6, &n, &ttl);
if (err)
goto free_encap;
/* used by mlx5e_detach_encap to lookup a neigh hash table
* entry in the neigh hash table when a user deletes a rule
*/
......@@ -352,13 +385,10 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
read_unlock_bh(&n->lock);
/* add ethernet header */
eth = (struct ethhdr *)encap_header;
ether_addr_copy(eth->h_dest, e->h_dest);
ether_addr_copy(eth->h_source, out_dev->dev_addr);
eth->h_proto = htons(ETH_P_IPV6);
ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
ETH_P_IPV6);
/* add ip header */
ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
ip6_flow_hdr(ip6h, tun_key->tos, 0);
/* the HW fills up ipv6 payload len */
ip6h->hop_limit = ttl;
......
......@@ -45,8 +45,8 @@
#include "en/tc_tun.h"
#include "fs_core.h"
#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
......@@ -1317,6 +1317,15 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
.ndo_get_vf_stats = mlx5e_get_vf_stats,
};
bool mlx5e_eswitch_rep(struct net_device *netdev)
{
if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep ||
netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep)
return true;
return false;
}
static void mlx5e_build_rep_params(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
......@@ -1337,7 +1346,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
if (rep->vport == FDB_UPLINK_VPORT)
params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
else
params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE;;
/* RQ */
mlx5e_build_rq_params(mdev, params);
......@@ -1382,7 +1391,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
netdev->hw_features |= NETIF_F_HW_TC;
netdev->hw_features |= NETIF_F_SG;
......@@ -1393,6 +1402,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_TSO6;
netdev->hw_features |= NETIF_F_RXCSUM;
if (rep->vport != FDB_UPLINK_VPORT)
netdev->features |= NETIF_F_VLAN_CHALLENGED;
netdev->features |= netdev->hw_features;
}
......
......@@ -176,6 +176,9 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
bool mlx5e_eswitch_rep(struct net_device *netdev);
#else /* CONFIG_MLX5_ESWITCH */
static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
......
......@@ -2584,6 +2584,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
uplink_upper == out_dev)
out_dev = uplink_dev;
if (!mlx5e_eswitch_rep(out_dev))
return -EOPNOTSUPP;
out_priv = netdev_priv(out_dev);
rpriv = out_priv->ppriv;
attr->dests[attr->out_count].rep = rpriv->rep;
......
......@@ -291,12 +291,15 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
{
if (ldev->pf[0].dev &&
ldev->pf[1].dev &&
mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev))
return true;
else
if (!ldev->pf[0].dev || !ldev->pf[1].dev)
return false;
#ifdef CONFIG_MLX5_ESWITCH
return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
#else
return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) &&
!mlx5_sriov_is_enabled(ldev->pf[1].dev));
#endif
}
static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
......
......@@ -72,7 +72,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
clock);
return mlx5_read_internal_timer(mdev) & cc->mask;
return mlx5_read_internal_timer(mdev, NULL) & cc->mask;
}
static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
......@@ -156,15 +156,19 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
return 0;
}
static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
struct ptp_system_timestamp *sts)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
ptp_info);
u64 ns;
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
clock);
unsigned long flags;
u64 cycles, ns;
write_seqlock_irqsave(&clock->lock, flags);
ns = timecounter_read(&clock->tc);
cycles = mlx5_read_internal_timer(mdev, sts);
ns = timecounter_cyc2time(&clock->tc, cycles);
write_sequnlock_irqrestore(&clock->lock, flags);
*ts = ns_to_timespec64(ns);
......@@ -307,7 +311,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
ts.tv_sec = rq->perout.start.sec;
ts.tv_nsec = rq->perout.start.nsec;
ns = timespec64_to_ns(&ts);
cycles_now = mlx5_read_internal_timer(mdev);
cycles_now = mlx5_read_internal_timer(mdev, NULL);
write_seqlock_irqsave(&clock->lock, flags);
nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
nsec_delta = ns - nsec_now;
......@@ -384,7 +388,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = {
.pps = 0,
.adjfreq = mlx5_ptp_adjfreq,
.adjtime = mlx5_ptp_adjtime,
.gettime64 = mlx5_ptp_gettime,
.gettimex64 = mlx5_ptp_gettimex,
.settime64 = mlx5_ptp_settime,
.enable = NULL,
.verify = NULL,
......@@ -469,8 +473,8 @@ static int mlx5_pps_event(struct notifier_block *nb,
ptp_clock_event(clock->ptp, &ptp_event);
break;
case PTP_PF_PEROUT:
mlx5_ptp_gettime(&clock->ptp_info, &ts);
cycles_now = mlx5_read_internal_timer(mdev);
mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL);
cycles_now = mlx5_read_internal_timer(mdev, NULL);
ts.tv_sec += 1;
ts.tv_nsec = 0;
ns = timespec64_to_ns(&ts);
......@@ -517,14 +521,14 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
ktime_to_ns(ktime_get_real()));
/* Calculate period in seconds to call the overflow watchdog - to make
* sure counter is checked at least once every wrap around.
* sure counter is checked at least twice every wrap around.
* The period is calculated as the minimum between max HW cycles count
* (The clock source mask) and max amount of cycles that can be
* multiplied by clock multiplier where the result doesn't exceed
* 64bits.
*/
overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1);
overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles,
frac, &frac);
......
......@@ -580,15 +580,22 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
struct ptp_system_timestamp *sts)
{
u32 timer_h, timer_h1, timer_l;
timer_h = ioread32be(&dev->iseg->internal_timer_h);
ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
ptp_read_system_postts(sts);
timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
if (timer_h != timer_h1) /* wrap around */
if (timer_h != timer_h1) {
/* wrap around */
ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
ptp_read_system_postts(sts);
}
return (u64)timer_l | (u64)timer_h1 << 32;
}
......
......@@ -38,6 +38,7 @@
#include <linux/sched.h>
#include <linux/if_link.h>
#include <linux/firmware.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/fs.h>
......@@ -121,7 +122,8 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
u32 element_id);
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
struct ptp_system_timestamp *sts);
void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
......
......@@ -1204,9 +1204,19 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
{
if (!mdev->sys_image_guid)
mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid);
int port_type_cap = MLX5_CAP_GEN(mdev, port_type);
u64 tmp = 0;
return mdev->sys_image_guid;
if (mdev->sys_image_guid)
return mdev->sys_image_guid;
if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH)
mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
else
mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
mdev->sys_image_guid = tmp;
return tmp;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment