Commit 7d9e5f42 authored by Wei Wang's avatar Wei Wang Committed by David S. Miller

ipv6: convert major tx path to use RT6_LOOKUP_F_DST_NOREF

For tx path, in most cases, we still have to take refcnt on the dst
cause the caller is caching the dst somewhere. But it still is
beneficial to make use of RT6_LOOKUP_F_DST_NOREF flag while doing the
route lookup. It is cause this flag prevents manipulating refcnt on
net->ipv6.ip6_null_entry when doing fib6_rule_lookup() to traverse each
routing table. The null_entry is a shared object and constant updates on
it cause false sharing.

We converted the current major lookup function ip6_route_output_flags()
to make use of RT6_LOOKUP_F_DST_NOREF.

Together with the change in the rx path, we see noticable performance
boost:
I ran synflood tests between 2 hosts under the same switch. Both hosts
have 20G mlx NIC, and 8 tx/rx queues.
Sender sends pure SYN flood with random src IPs and ports using trafgen.
Receiver has a simple TCP listener on the target port.
Both hosts have multiple custom rules:
- For incoming packets, only local table is traversed.
- For outgoing packets, 3 tables are traversed to find the route.
The packet processing rate on the receiver is as follows:
- Before the fix: 3.78Mpps
- After the fix:  5.50Mpps
Signed-off-by: default avatarWei Wang <weiwan@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 67f415dd
...@@ -1072,12 +1072,14 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, ...@@ -1072,12 +1072,14 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
/* send to link-local or multicast address via interface enslaved to /* send to link-local or multicast address via interface enslaved to
* VRF device. Force lookup to VRF table without changing flow struct * VRF device. Force lookup to VRF table without changing flow struct
* Note: Caller to this function must hold rcu_read_lock() and no refcnt
* is taken on the dst by this function.
*/ */
static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
struct flowi6 *fl6) struct flowi6 *fl6)
{ {
struct net *net = dev_net(dev); struct net *net = dev_net(dev);
int flags = RT6_LOOKUP_F_IFACE; int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF;
struct dst_entry *dst = NULL; struct dst_entry *dst = NULL;
struct rt6_info *rt; struct rt6_info *rt;
...@@ -1087,7 +1089,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, ...@@ -1087,7 +1089,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
*/ */
if (fl6->flowi6_oif == dev->ifindex) { if (fl6->flowi6_oif == dev->ifindex) {
dst = &net->ipv6.ip6_null_entry->dst; dst = &net->ipv6.ip6_null_entry->dst;
dst_hold(dst);
return dst; return dst;
} }
......
...@@ -84,6 +84,10 @@ struct dst_entry *ip6_route_input_lookup(struct net *net, ...@@ -84,6 +84,10 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
struct flowi6 *fl6, struct flowi6 *fl6,
const struct sk_buff *skb, int flags); const struct sk_buff *skb, int flags);
struct dst_entry *ip6_route_output_flags_noref(struct net *net,
const struct sock *sk,
struct flowi6 *fl6, int flags);
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags); struct flowi6 *fl6, int flags);
......
...@@ -2415,8 +2415,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, ...@@ -2415,8 +2415,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net,
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
} }
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct dst_entry *ip6_route_output_flags_noref(struct net *net,
struct flowi6 *fl6, int flags) const struct sock *sk,
struct flowi6 *fl6, int flags)
{ {
bool any_src; bool any_src;
...@@ -2424,6 +2425,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, ...@@ -2424,6 +2425,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) { (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
struct dst_entry *dst; struct dst_entry *dst;
/* This function does not take refcnt on the dst */
dst = l3mdev_link_scope_lookup(net, fl6); dst = l3mdev_link_scope_lookup(net, fl6);
if (dst) if (dst)
return dst; return dst;
...@@ -2431,6 +2433,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, ...@@ -2431,6 +2433,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
fl6->flowi6_iif = LOOPBACK_IFINDEX; fl6->flowi6_iif = LOOPBACK_IFINDEX;
flags |= RT6_LOOKUP_F_DST_NOREF;
any_src = ipv6_addr_any(&fl6->saddr); any_src = ipv6_addr_any(&fl6->saddr);
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
(fl6->flowi6_oif && any_src)) (fl6->flowi6_oif && any_src))
...@@ -2443,6 +2446,28 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, ...@@ -2443,6 +2446,28 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
} }
EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
struct dst_entry *ip6_route_output_flags(struct net *net,
const struct sock *sk,
struct flowi6 *fl6,
int flags)
{
struct dst_entry *dst;
struct rt6_info *rt6;
rcu_read_lock();
dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
rt6 = (struct rt6_info *)dst;
/* For dst cached in uncached_list, refcnt is already taken. */
if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
dst = &net->ipv6.ip6_null_entry->dst;
dst_hold(dst);
}
rcu_read_unlock();
return dst;
}
EXPORT_SYMBOL_GPL(ip6_route_output_flags); EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
......
...@@ -118,6 +118,8 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); ...@@ -118,6 +118,8 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
* local and multicast addresses * local and multicast addresses
* @net: network namespace for device index lookup * @net: network namespace for device index lookup
* @fl6: IPv6 flow struct for lookup * @fl6: IPv6 flow struct for lookup
* This function does not hold refcnt on the returned dst.
* Caller must hold rcu_read_lock().
*/ */
struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
...@@ -126,9 +128,8 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, ...@@ -126,9 +128,8 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
struct dst_entry *dst = NULL; struct dst_entry *dst = NULL;
struct net_device *dev; struct net_device *dev;
WARN_ON_ONCE(!rcu_read_lock_held());
if (fl6->flowi6_oif) { if (fl6->flowi6_oif) {
rcu_read_lock();
dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
if (dev && netif_is_l3_slave(dev)) if (dev && netif_is_l3_slave(dev))
dev = netdev_master_upper_dev_get_rcu(dev); dev = netdev_master_upper_dev_get_rcu(dev);
...@@ -136,8 +137,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, ...@@ -136,8 +137,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
if (dev && netif_is_l3_master(dev) && if (dev && netif_is_l3_master(dev) &&
dev->l3mdev_ops->l3mdev_link_scope_lookup) dev->l3mdev_ops->l3mdev_link_scope_lookup)
dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6);
rcu_read_unlock();
} }
return dst; return dst;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment