Commit b72137ec authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-sched-conditional-notification-of-events-for-cls-and-act'

Pedro Tammela says:

====================
net/sched: conditional notification of events for cls and act

This is an optimization we have been leveraging on P4TC but we believe
it will benefit rtnl users in general.

It's common to allocate an skb, build a notification message and then
broadcast an event. In the absence of any user space listeners, these
resources (cpu and memory operations) are wasted. In cases where the subsystem
is lockless (such as in tc-flower) this waste is more prominent. For the
scenarios where the rtnl_lock is held it is not as prominent.

The idea is simple. Build and send the notification iif:
   - The user requests via NLM_F_ECHO or
   - Someone is listening to the rtnl group (tc mon)

On a simple test with tc-flower adding 1M entries, using just a single core,
there's already a noticeable difference in the cycles spent in tc_new_tfilter
with this patchset.

before:
   - 43.68% tc_new_tfilter
      + 31.73% fl_change
      + 6.35% tfilter_notify
      + 1.62% nlmsg_notify
        0.66% __tcf_qdisc_find.part.0
        0.64% __tcf_chain_get
        0.54% fl_get
      + 0.53% tcf_proto_lookup_ops

after:
   - 39.20% tc_new_tfilter
      + 34.58% fl_change
        0.69% __tcf_qdisc_find.part.0
        0.67% __tcf_chain_get
      + 0.61% tcf_proto_lookup_ops

Note, the above test is using iproute2:tc which execs a shell.
We expect people using netlink directly to observe even greater
reductions.

The qdisc side needs some refactoring of the notification routines to fit in
this new model, so they will be sent in a later patchset.
====================

Link: https://lore.kernel.org/r/20231208192847.714940-1-pctammela@mojatatu.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 70028b2e 93775590
......@@ -10,6 +10,13 @@
#include <uapi/linux/rtnetlink.h>
extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
static inline int rtnetlink_maybe_send(struct sk_buff *skb, struct net *net,
u32 pid, u32 group, int echo)
{
return !skb ? 0 : rtnetlink_send(skb, net, pid, group, echo);
}
extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid);
extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid,
u32 group, const struct nlmsghdr *nlh, gfp_t flags);
......@@ -130,4 +137,26 @@ extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
extern void rtnl_offload_xstats_notify(struct net_device *dev);
static inline int rtnl_has_listeners(const struct net *net, u32 group)
{
struct sock *rtnl = net->rtnl;
return netlink_has_listeners(rtnl, group);
}
/**
* rtnl_notify_needed - check if notification is needed
* @net: Pointer to the net namespace
* @nlflags: netlink ingress message flags
* @group: rtnl group
*
* Based on the ingress message flags and rtnl group, returns true
* if a notification is needed, false otherwise.
*/
static inline bool
rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group)
{
return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group);
}
#endif /* __LINUX_RTNETLINK_H */
......@@ -1785,31 +1785,45 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[])
return 0;
}
static int
tcf_reoffload_del_notify(struct net *net, struct tc_action *action)
static struct sk_buff *tcf_reoffload_del_notify_msg(struct net *net,
struct tc_action *action)
{
size_t attr_size = tcf_action_fill_size(action);
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {
[0] = action,
};
const struct tc_action_ops *ops = action->ops;
struct sk_buff *skb;
int ret;
skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
GFP_KERNEL);
skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
if (!skb)
return -ENOBUFS;
return ERR_PTR(-ENOBUFS);
if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) {
kfree_skb(skb);
return -EINVAL;
return ERR_PTR(-EINVAL);
}
return skb;
}
static int tcf_reoffload_del_notify(struct net *net, struct tc_action *action)
{
const struct tc_action_ops *ops = action->ops;
struct sk_buff *skb;
int ret;
if (!rtnl_notify_needed(net, 0, RTNLGRP_TC)) {
skb = NULL;
} else {
skb = tcf_reoffload_del_notify_msg(net, action);
if (IS_ERR(skb))
return PTR_ERR(skb);
}
ret = tcf_idr_release_unsafe(action);
if (ret == ACT_P_DELETED) {
module_put(ops->owner);
ret = rtnetlink_send(skb, net, 0, RTNLGRP_TC, 0);
ret = rtnetlink_maybe_send(skb, net, 0, RTNLGRP_TC, 0);
} else {
kfree_skb(skb);
}
......@@ -1875,23 +1889,41 @@ int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb,
return 0;
}
static int
tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
static struct sk_buff *tcf_del_notify_msg(struct net *net, struct nlmsghdr *n,
struct tc_action *actions[],
u32 portid, size_t attr_size,
struct netlink_ext_ack *extack)
{
int ret;
struct sk_buff *skb;
skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
GFP_KERNEL);
skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
if (!skb)
return -ENOBUFS;
return ERR_PTR(-ENOBUFS);
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
0, 2, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
kfree_skb(skb);
return -EINVAL;
return ERR_PTR(-EINVAL);
}
return skb;
}
static int tcf_del_notify(struct net *net, struct nlmsghdr *n,
struct tc_action *actions[], u32 portid,
size_t attr_size, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
int ret;
if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
skb = NULL;
} else {
skb = tcf_del_notify_msg(net, n, actions, portid, attr_size,
extack);
if (IS_ERR(skb))
return PTR_ERR(skb);
}
/* now do the delete */
......@@ -1902,9 +1934,8 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
return ret;
}
ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
return ret;
return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
}
static int
......@@ -1955,26 +1986,44 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
return ret;
}
static int
tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
static struct sk_buff *tcf_add_notify_msg(struct net *net, struct nlmsghdr *n,
struct tc_action *actions[],
u32 portid, size_t attr_size,
struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
GFP_KERNEL);
skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
if (!skb)
return -ENOBUFS;
return ERR_PTR(-ENOBUFS);
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
RTM_NEWACTION, 0, 0, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
return -EINVAL;
return ERR_PTR(-EINVAL);
}
return skb;
}
static int tcf_add_notify(struct net *net, struct nlmsghdr *n,
struct tc_action *actions[], u32 portid,
size_t attr_size, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
skb = NULL;
} else {
skb = tcf_add_notify_msg(net, n, actions, portid, attr_size,
extack);
if (IS_ERR(skb))
return PTR_ERR(skb);
}
return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
}
static int tcf_action_add(struct net *net, struct nlattr *nla,
......
......@@ -650,7 +650,7 @@ static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
void *tmplt_priv, u32 chain_index,
struct tcf_block *block, struct sk_buff *oskb,
u32 seq, u16 flags, bool unicast);
u32 seq, u16 flags);
static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
bool explicitly_created)
......@@ -685,8 +685,7 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
if (non_act_refcnt == chain->explicitly_created && !by_act) {
if (non_act_refcnt == 0)
tc_chain_notify_delete(tmplt_ops, tmplt_priv,
chain->index, block, NULL, 0, 0,
false);
chain->index, block, NULL, 0, 0);
/* Last reference to chain, no need to lock. */
chain->flushing = false;
}
......@@ -2053,6 +2052,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
int err = 0;
if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
return 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
......@@ -2075,13 +2077,16 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
u32 parent, void *fh, bool unicast, bool *last,
bool rtnl_held, struct netlink_ext_ack *extack)
u32 parent, void *fh, bool *last, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
int err;
if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
return tp->ops->delete(tp, fh, last, rtnl_held, extack);
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
......@@ -2100,11 +2105,8 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
return err;
}
if (unicast)
err = rtnl_unicast(skb, net, portid);
else
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
......@@ -2499,9 +2501,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
} else {
bool last;
err = tfilter_del_notify(net, skb, n, tp, block,
q, parent, fh, false, &last,
rtnl_held, extack);
err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh,
&last, rtnl_held, extack);
if (err)
goto errout;
......@@ -2906,6 +2907,9 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
struct sk_buff *skb;
int err = 0;
if (!unicast && !rtnl_notify_needed(net, flags, RTNLGRP_TC))
return 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
......@@ -2929,12 +2933,15 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
void *tmplt_priv, u32 chain_index,
struct tcf_block *block, struct sk_buff *oskb,
u32 seq, u16 flags, bool unicast)
u32 seq, u16 flags)
{
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
struct net *net = block->net;
struct sk_buff *skb;
if (!rtnl_notify_needed(net, flags, RTNLGRP_TC))
return 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
......@@ -2945,9 +2952,6 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
return -EINVAL;
}
if (unicast)
return rtnl_unicast(skb, net, portid);
return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment