Commit 17593357 authored by David S. Miller's avatar David S. Miller

Merge branch 'net-sched-skip_sw'

Asbjørn Sloth Tønnesen says:

====================
make skip_sw actually skip software

During development of flower-route[1], which I
recently presented at FOSDEM[2], I noticed that
CPU usage, would increase the more rules I installed
into the hardware for IP forwarding offloading.

Since we use TC flower offload for the hottest
prefixes, and leave the long tail to the normal (non-TC)
Linux network stack for slow-path IP forwarding.
We therefore need both the hardware and software
datapath to perform well.

I found that skip_sw rules, are quite expensive
in the kernel datapath, since they must be evaluated
and matched upon, before the kernel checks the
skip_sw flag.

This patchset optimizes the case where all rules
are skip_sw, by implementing a TC bypass for these
cases, where TC is only used as a control plane
for the hardware path.

v4:
- Rebased onto net-next, now that net-next is open again

v3: https://lore.kernel.org/netdev/20240306165813.656931-1-ast@fiberby.net/
- Patch 3:
  - Fix source_inline
  - Fix build failure, when CONFIG_NET_CLS without CONFIG_NET_CLS_ACT.

v2: https://lore.kernel.org/netdev/20240305144404.569632-1-ast@fiberby.net/
- Patch 1:
  - Add Reviewed-By from Jiri Pirko
- Patch 2:
  - Move code, to avoid forward declaration (Jiri).
- Patch 3
  - Refactor to use a static key.
  - Add performance data for trapping, or sending
    a packet to a non-existent chain (as suggested by Marcelo).

v1: https://lore.kernel.org/netdev/20240215160458.1727237-1-ast@fiberby.net/

[1] flower-route
    https://github.com/fiberby-dk/flower-route

[2] FOSDEM talk
    https://fosdem.org/2024/schedule/event/fosdem-2024-3337-flying-higher-hardware-offloading-with-bird/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fd2162a5 047f340b
...@@ -74,6 +74,15 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block) ...@@ -74,6 +74,15 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block)
return block && block->index; return block && block->index;
} }
#ifdef CONFIG_NET_CLS_ACT
DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
static inline bool tcf_block_bypass_sw(struct tcf_block *block)
{
return block && block->bypass_wanted;
}
#endif
static inline struct Qdisc *tcf_block_q(struct tcf_block *block) static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
{ {
WARN_ON(tcf_block_shared(block)); WARN_ON(tcf_block_shared(block));
......
...@@ -422,6 +422,7 @@ struct tcf_proto { ...@@ -422,6 +422,7 @@ struct tcf_proto {
*/ */
spinlock_t lock; spinlock_t lock;
bool deleting; bool deleting;
bool counted;
refcount_t refcnt; refcount_t refcnt;
struct rcu_head rcu; struct rcu_head rcu;
struct hlist_node destroy_ht_node; struct hlist_node destroy_ht_node;
...@@ -471,6 +472,9 @@ struct tcf_block { ...@@ -471,6 +472,9 @@ struct tcf_block {
struct flow_block flow_block; struct flow_block flow_block;
struct list_head owner_list; struct list_head owner_list;
bool keep_dst; bool keep_dst;
bool bypass_wanted;
atomic_t filtercnt; /* Number of filters */
atomic_t skipswcnt; /* Number of skip_sw filters */
atomic_t offloadcnt; /* Number of oddloaded filters */ atomic_t offloadcnt; /* Number of oddloaded filters */
unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */
......
...@@ -2083,6 +2083,11 @@ void net_dec_egress_queue(void) ...@@ -2083,6 +2083,11 @@ void net_dec_egress_queue(void)
EXPORT_SYMBOL_GPL(net_dec_egress_queue); EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif #endif
#ifdef CONFIG_NET_CLS_ACT
DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
EXPORT_SYMBOL(tcf_bypass_check_needed_key);
#endif
DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
EXPORT_SYMBOL(netstamp_needed_key); EXPORT_SYMBOL(netstamp_needed_key);
#ifdef CONFIG_JUMP_LABEL #ifdef CONFIG_JUMP_LABEL
...@@ -3937,6 +3942,11 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, ...@@ -3937,6 +3942,11 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
if (!miniq) if (!miniq)
return ret; return ret;
if (static_branch_unlikely(&tcf_bypass_check_needed_key)) {
if (tcf_block_bypass_sw(miniq->block))
return ret;
}
tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false; tc_skb_cb(skb)->post_ct = false;
tcf_set_drop_reason(skb, *drop_reason); tcf_set_drop_reason(skb, *drop_reason);
......
...@@ -410,12 +410,48 @@ static void tcf_proto_get(struct tcf_proto *tp) ...@@ -410,12 +410,48 @@ static void tcf_proto_get(struct tcf_proto *tp)
refcount_inc(&tp->refcnt); refcount_inc(&tp->refcnt);
} }
static void tcf_maintain_bypass(struct tcf_block *block)
{
int filtercnt = atomic_read(&block->filtercnt);
int skipswcnt = atomic_read(&block->skipswcnt);
bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt;
if (bypass_wanted != block->bypass_wanted) {
#ifdef CONFIG_NET_CLS_ACT
if (bypass_wanted)
static_branch_inc(&tcf_bypass_check_needed_key);
else
static_branch_dec(&tcf_bypass_check_needed_key);
#endif
block->bypass_wanted = bypass_wanted;
}
}
static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add)
{
lockdep_assert_not_held(&block->cb_lock);
down_write(&block->cb_lock);
if (*counted != add) {
if (add) {
atomic_inc(&block->filtercnt);
*counted = true;
} else {
atomic_dec(&block->filtercnt);
*counted = false;
}
}
tcf_maintain_bypass(block);
up_write(&block->cb_lock);
}
static void tcf_chain_put(struct tcf_chain *chain); static void tcf_chain_put(struct tcf_chain *chain);
static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
bool sig_destroy, struct netlink_ext_ack *extack) bool sig_destroy, struct netlink_ext_ack *extack)
{ {
tp->ops->destroy(tp, rtnl_held, extack); tp->ops->destroy(tp, rtnl_held, extack);
tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false);
if (sig_destroy) if (sig_destroy)
tcf_proto_signal_destroyed(tp->chain, tp); tcf_proto_signal_destroyed(tp->chain, tp);
tcf_chain_put(tp->chain); tcf_chain_put(tp->chain);
...@@ -2364,6 +2400,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, ...@@ -2364,6 +2400,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
flags, extack); flags, extack);
if (err == 0) { if (err == 0) {
tcf_block_filter_cnt_update(block, &tp->counted, true);
tfilter_notify(net, skb, n, tp, block, q, parent, fh, tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held, extack); RTM_NEWTFILTER, false, rtnl_held, extack);
tfilter_put(tp, fh); tfilter_put(tp, fh);
...@@ -3483,6 +3520,8 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) ...@@ -3483,6 +3520,8 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
if (*flags & TCA_CLS_FLAGS_IN_HW) if (*flags & TCA_CLS_FLAGS_IN_HW)
return; return;
*flags |= TCA_CLS_FLAGS_IN_HW; *flags |= TCA_CLS_FLAGS_IN_HW;
if (tc_skip_sw(*flags))
atomic_inc(&block->skipswcnt);
atomic_inc(&block->offloadcnt); atomic_inc(&block->offloadcnt);
} }
...@@ -3491,6 +3530,8 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) ...@@ -3491,6 +3530,8 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
if (!(*flags & TCA_CLS_FLAGS_IN_HW)) if (!(*flags & TCA_CLS_FLAGS_IN_HW))
return; return;
*flags &= ~TCA_CLS_FLAGS_IN_HW; *flags &= ~TCA_CLS_FLAGS_IN_HW;
if (tc_skip_sw(*flags))
atomic_dec(&block->skipswcnt);
atomic_dec(&block->offloadcnt); atomic_dec(&block->offloadcnt);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment