Commit 8f3f6500 authored by David S. Miller's avatar David S. Miller

Merge branch 'TC-refactor-act_mirred-packets-re-injection'

Paolo Abeni says:

====================
TC: refactor act_mirred packets re-injection

This series is aimed at improving the act_mirred redirect performances.
Such action is used by OVS to represent TC S/W flows, and it's current largest
bottle-neck is the need for a skb_clone() for each packet.

The first 2 patches introduce some cleanup and safeguards to allow extending
tca_result - we will use it to store RCU protected redirect information - and
introduce a clear separation between user-space accessible tcfa_action
values and internal values accessible only by the kernel.
Then a new tcfa_action value is introduced: TC_ACT_REINJECT, similar to
TC_ACT_REDIRECT, but preserving the mirred semantic. Such value is not
accessible from user-space.
The last patch exploits the newly introduced infrastructure in the act_mirred
action, to avoid a skb_clone, when possible.

Overall this the above gives a ~10% performance improvement in forwarding tput,
when using the TC S/W datapath.

v1 -> v2:
 - preserve the rcu lock in act_bpf
 - add and use a new action value to reinject the packets, preserving the mirred
   semantic

v2 -> v3:
 - renamed to new action as TC_ACT_REINJECT
 - TC_ACT_REINJECT is not exposed to user-space

v3 -> v4:
 - dropped the TC_ACT_REDIRECT patch
 - report failure via extack, too
 - rename the new action as TC_ACT_REINSERT
 - skip clone only if the control action don't touch tcf_result

v4 -> v5:
 - fix a couple of build issue reported by kbuild bot
 - dont split messages
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c87fffc5 e5cf1baf
......@@ -85,7 +85,7 @@ struct tc_action_ops {
size_t size;
struct module *owner;
int (*act)(struct sk_buff *, const struct tc_action *,
struct tcf_result *);
struct tcf_result *); /* called under RCU BH lock*/
int (*dump)(struct sk_buff *, struct tc_action *, int, int);
void (*cleanup)(struct tc_action *);
int (*lookup)(struct net *net, struct tc_action **a, u32 index,
......
......@@ -7,6 +7,9 @@
#include <net/sch_generic.h>
#include <net/act_api.h>
/* TC action not accessible from user space */
#define TC_ACT_REINSERT (TC_ACT_VALUE_MAX + 1)
/* Basic packet classifier frontend definitions. */
struct tcf_walker {
......
......@@ -235,6 +235,12 @@ struct tcf_result {
u32 classid;
};
const struct tcf_proto *goto_tp;
/* used by the TC_ACT_REINSERT action */
struct {
bool ingress;
struct gnet_stats_queue *qstats;
};
};
};
......@@ -285,6 +291,8 @@ struct tcf_proto {
/* Fast access part */
struct tcf_proto __rcu *next;
void __rcu *root;
/* called under RCU BH lock*/
int (*classify)(struct sk_buff *,
const struct tcf_proto *,
struct tcf_result *);
......@@ -567,6 +575,15 @@ static inline void skb_reset_tc(struct sk_buff *skb)
#endif
}
static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
return skb->tc_redirected;
#else
return false;
#endif
}
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
......@@ -1106,4 +1123,17 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
struct mini_Qdisc __rcu **p_miniq);
static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
{
struct gnet_stats_queue *stats = res->qstats;
int ret;
if (res->ingress)
ret = netif_receive_skb(skb);
else
ret = dev_queue_xmit(skb);
if (ret && stats)
qstats_overlimit_inc(res->qstats);
}
#endif
......@@ -45,6 +45,7 @@ enum {
* the skb and act like everything
* is alright.
*/
#define TC_ACT_VALUE_MAX TC_ACT_TRAP
/* There is a special kind of actions called "extended actions",
* which need a value parameter. These have a local opcode located in
......@@ -55,11 +56,12 @@ enum {
#define __TC_ACT_EXT_SHIFT 28
#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
#define TC_ACT_EXT_CMP(combined, opcode) \
(((combined) & (~TC_ACT_EXT_VAL_MASK)) == opcode)
#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
#define TC_ACT_JUMP __TC_ACT_EXT(1)
#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN
/* Action type identifiers*/
enum {
......
......@@ -4252,7 +4252,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
if (skb_cloned(skb))
if (skb_cloned(skb) || skb_is_tc_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
......@@ -4602,6 +4602,10 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
__skb_push(skb, skb->mac_len);
skb_do_redirect(skb);
return NULL;
case TC_ACT_REINSERT:
/* this does not scrub the packet, and updates stats on error */
skb_tc_reinsert(skb, &cl_res);
return NULL;
default:
break;
}
......
......@@ -786,6 +786,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
static bool tcf_action_valid(int action)
{
int opcode = TC_ACT_EXT_OPCODE(action);
if (!opcode)
return action <= TC_ACT_VALUE_MAX;
return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
}
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
......@@ -895,6 +904,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
}
}
if (!tcf_action_valid(a->tcfa_action)) {
NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead");
a->tcfa_action = TC_ACT_UNSPEC;
}
return a;
err_mod:
......
......@@ -561,15 +561,14 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
u32 update_flags;
int action;
rcu_read_lock();
params = rcu_dereference(p->params);
params = rcu_dereference_bh(p->params);
tcf_lastuse_update(&p->tcf_tm);
bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
action = READ_ONCE(p->tcf_action);
if (unlikely(action == TC_ACT_SHOT))
goto drop_stats;
goto drop;
update_flags = params->update_flags;
switch (tc_skb_protocol(skb)) {
......@@ -583,16 +582,11 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
break;
}
unlock:
rcu_read_unlock();
return action;
drop:
action = TC_ACT_SHOT;
drop_stats:
qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
goto unlock;
return TC_ACT_SHOT;
}
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
......
......@@ -820,14 +820,11 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_ife_params *p;
int ret;
rcu_read_lock();
p = rcu_dereference(ife->params);
p = rcu_dereference_bh(ife->params);
if (p->flags & IFE_ENCODE) {
ret = tcf_ife_encode(skb, a, res, p);
rcu_read_unlock();
return ret;
}
rcu_read_unlock();
return tcf_ife_decode(skb, a, res);
}
......
......@@ -25,6 +25,7 @@
#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <linux/tc_act/tc_mirred.h>
#include <net/tc_act/tc_mirred.h>
......@@ -49,6 +50,18 @@ static bool tcf_mirred_act_wants_ingress(int action)
}
}
static bool tcf_mirred_can_reinsert(int action)
{
switch (action) {
case TC_ACT_SHOT:
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
return true;
}
return false;
}
static void tcf_mirred_release(struct tc_action *a)
{
struct tcf_mirred *m = to_mirred(a);
......@@ -171,21 +184,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_mirred *m = to_mirred(a);
struct sk_buff *skb2 = skb;
bool m_mac_header_xmit;
struct net_device *dev;
struct sk_buff *skb2;
int retval, err = 0;
bool use_reinsert;
bool want_ingress;
bool is_redirect;
int m_eaction;
int mac_len;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
rcu_read_lock();
m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
m_eaction = READ_ONCE(m->tcfm_eaction);
retval = READ_ONCE(m->tcf_action);
dev = rcu_dereference(m->tcfm_dev);
dev = rcu_dereference_bh(m->tcfm_dev);
if (unlikely(!dev)) {
pr_notice_once("tc mirred: target device is gone\n");
goto out;
......@@ -197,16 +212,25 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
/* we could easily avoid the clone only if called by ingress and clsact;
* since we can't easily detect the clsact caller, skip clone only for
* ingress - that covers the TC S/W datapath.
*/
is_redirect = tcf_mirred_is_act_redirect(m_eaction);
use_reinsert = skb_at_tc_ingress(skb) && is_redirect &&
tcf_mirred_can_reinsert(retval);
if (!use_reinsert) {
skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2)
goto out;
}
/* If action's target direction differs than filter's direction,
* and devices expect a mac header on xmit, then mac push/pull is
* needed.
*/
if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) &&
m_mac_header_xmit) {
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
if (!skb_at_tc_ingress(skb)) {
/* caught at egress, act ingress: pull mac */
mac_len = skb_network_header(skb) - skb_mac_header(skb);
......@@ -217,15 +241,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
}
}
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
/* mirror is always swallowed */
if (tcf_mirred_is_act_redirect(m_eaction)) {
if (is_redirect) {
skb2->tc_redirected = 1;
skb2->tc_from_ingress = skb2->tc_at_ingress;
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
res->qstats = this_cpu_ptr(m->common.cpu_qstats);
return TC_ACT_REINSERT;
}
}
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
if (!tcf_mirred_act_wants_ingress(m_eaction))
if (!want_ingress)
err = dev_queue_xmit(skb2);
else
err = netif_receive_skb(skb2);
......@@ -236,7 +268,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
rcu_read_unlock();
return retval;
}
......
......@@ -140,8 +140,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
retval = READ_ONCE(s->tcf_action);
rcu_read_lock();
psample_group = rcu_dereference(s->psample_group);
psample_group = rcu_dereference_bh(s->psample_group);
/* randomly sample packets according to rate */
if (psample_group && (prandom_u32() % s->rate == 0)) {
......@@ -165,7 +164,6 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
skb_pull(skb, skb->mac_len);
}
rcu_read_unlock();
return retval;
}
......
......@@ -43,8 +43,7 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&d->tcf_tm);
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
rcu_read_lock();
params = rcu_dereference(d->params);
params = rcu_dereference_bh(d->params);
action = READ_ONCE(d->tcf_action);
if (params->flags & SKBEDIT_F_PRIORITY)
......@@ -77,14 +76,11 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
}
if (params->flags & SKBEDIT_F_PTYPE)
skb->pkt_type = params->ptype;
unlock:
rcu_read_unlock();
return action;
err:
qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
action = TC_ACT_SHOT;
goto unlock;
return TC_ACT_SHOT;
}
static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
......
......@@ -41,20 +41,14 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
* then MAX_EDIT_LEN needs to change appropriately
*/
err = skb_ensure_writable(skb, MAX_EDIT_LEN);
if (unlikely(err)) { /* best policy is to drop on the floor */
qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
return TC_ACT_SHOT;
}
if (unlikely(err)) /* best policy is to drop on the floor */
goto drop;
rcu_read_lock();
action = READ_ONCE(d->tcf_action);
if (unlikely(action == TC_ACT_SHOT)) {
qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
rcu_read_unlock();
return action;
}
if (unlikely(action == TC_ACT_SHOT))
goto drop;
p = rcu_dereference(d->skbmod_p);
p = rcu_dereference_bh(d->skbmod_p);
flags = p->flags;
if (flags & SKBMOD_F_DMAC)
ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
......@@ -62,7 +56,6 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
if (flags & SKBMOD_F_ETYPE)
eth_hdr(skb)->h_proto = p->eth_type;
rcu_read_unlock();
if (flags & SKBMOD_F_SWAPMAC) {
u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
......@@ -73,6 +66,10 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
}
return action;
drop:
qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
return TC_ACT_SHOT;
}
static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
......
......@@ -31,9 +31,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_tunnel_key_params *params;
int action;
rcu_read_lock();
params = rcu_dereference(t->params);
params = rcu_dereference_bh(t->params);
tcf_lastuse_update(&t->tcf_tm);
bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
......@@ -53,8 +51,6 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
break;
}
rcu_read_unlock();
return action;
}
......
......@@ -40,11 +40,9 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
if (skb_at_tc_ingress(skb))
skb_push_rcsum(skb, skb->mac_len);
rcu_read_lock();
action = READ_ONCE(v->tcf_action);
p = rcu_dereference(v->vlan_p);
p = rcu_dereference_bh(v->vlan_p);
switch (p->tcfv_action) {
case TCA_VLAN_ACT_POP:
......@@ -61,7 +59,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
case TCA_VLAN_ACT_MODIFY:
/* No-op if no vlan tag (either hw-accel or in-payload) */
if (!skb_vlan_tagged(skb))
goto unlock;
goto out;
/* extract existing tag (and guarantee no hw-accel tag) */
if (skb_vlan_tag_present(skb)) {
tci = skb_vlan_tag_get(skb);
......@@ -86,18 +84,15 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
BUG();
}
goto unlock;
drop:
action = TC_ACT_SHOT;
qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
unlock:
rcu_read_unlock();
out:
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
return action;
drop:
qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
return TC_ACT_SHOT;
}
static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment